PyPI - tpu-inference - Versions diffs - 0.12.0.dev20251222__py3-none-any.whl → 0.12.0.dev20251224__py3-none-any.whl - Mend

tpu-inference 0.12.0.dev20251222py3-none-any.whl → 0.12.0.dev20251224py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

tests/core/test_dp_scheduler.py +128 -71
tests/e2e/test_data_parallel.py +176 -280
tests/e2e/test_hybrid_kvcache.py +219 -0
tests/e2e/test_speculative_decoding.py +26 -6
tests/layers/jax/test_qwix.py +1 -1
tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py +36 -21
tests/layers/vllm/test_compressed_tensors_w8a8_int8.py +36 -21
tests/layers/vllm/test_mxfp4.py +25 -10
tests/layers/vllm/test_unquantized.py +61 -31
tests/layers/vllm/utils.py +19 -4
tests/models/common/test_model_loader.py +2 -2
tests/models/jax/test_qwen2_5_vl.py +10 -11
tests/runner/test_multimodal_manager.py +3 -3
tests/runner/test_tpu_runner.py +67 -8
tests/runner/test_tpu_runner_dp.py +66 -0
tpu_inference/core/sched/dp_scheduler.py +65 -40
tpu_inference/kernels/mla/v1/kernel.py +7 -26
tpu_inference/layers/common/sharding.py +8 -3
tpu_inference/layers/jax/attention/deepseek_v3_attention.py +3 -3
tpu_inference/layers/jax/attention/gpt_oss_attention.py +3 -3
tpu_inference/layers/jax/attention/llama4_attention.py +3 -4
tpu_inference/layers/jax/sample/sampling.py +1 -1
tpu_inference/layers/vllm/fused_moe.py +51 -47
tpu_inference/layers/vllm/quantization/common.py +14 -13
tpu_inference/layers/vllm/quantization/mxfp4.py +21 -7
tpu_inference/layers/vllm/quantization/unquantized.py +19 -7
tpu_inference/layers/vllm/sharding.py +7 -4
tpu_inference/models/common/model_loader.py +11 -14
tpu_inference/models/jax/llama3.py +13 -10
tpu_inference/models/jax/llama_guard_4.py +1 -1
tpu_inference/models/jax/qwen2.py +3 -2
tpu_inference/models/jax/qwen2_5_vl.py +4 -4
tpu_inference/models/jax/utils/multi_modal_utils.py +4 -4
tpu_inference/models/jax/utils/qwix/qwix_utils.py +3 -3
tpu_inference/models/vllm/vllm_model_wrapper.py +5 -2
tpu_inference/platforms/tpu_platform.py +7 -7
tpu_inference/runner/compilation_manager.py +43 -33
tpu_inference/runner/kv_cache_manager.py +1 -2
tpu_inference/runner/multimodal_manager.py +1 -1
tpu_inference/runner/tpu_runner.py +12 -9
tpu_inference/utils.py +31 -30
tpu_inference/worker/tpu_worker.py +5 -2
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/METADATA +1 -1
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/RECORD +47 -46
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/WHEEL +0 -0
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/licenses/LICENSE +0 -0
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/top_level.txt +0 -0

tpu_inference/runner/kv_cache_manager.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import functools
-from typing import TYPE_CHECKING, Dict, List
+from typing import TYPE_CHECKING, List
 import jax
 import jax.numpy as jnp
@@ -212,7 +212,6 @@ class KVCacheManager:
         # uniform page size.
         representative_spec = kv_cache_config.kv_cache_groups[0].kv_cache_spec
         page_size_bytes = representative_spec.page_size_bytes
-        self.runner.layer_name_to_kvcache_index: Dict[str, int] = {}
         kv_caches = self.runner.kv_caches
         num_blocks_list = []
         for i, kv_cache_tensor in enumerate(kv_cache_config.kv_cache_tensors):

tpu_inference/runner/multimodal_manager.py CHANGED Viewed

@@ -148,7 +148,7 @@ class MultiModalManager:
             # 2. A list or tuple (length: num_items) of tensors, each of shape
             # (feature_size, hidden_size) in case the feature size is dynamic
             # depending on the input multimodal items.
-            curr_group_outputs = self.runner.get_multimodal_embeddings_fn(
+            curr_group_outputs = self.runner.embed_multimodal_fn(
                 self.runner.state, image_grid_thw, **batched_mm_inputs)
             sanity_check_mm_encoder_outputs(

tpu_inference/runner/tpu_runner.py CHANGED Viewed

@@ -282,6 +282,9 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
         self._substitute_placeholder_token_fn = _substitute_placeholder_token
         self.execute_model_state: ExecuteModelState | None = None
+        self.kv_caches: list[jax.Array] = []
+        self.layer_name_to_kvcache_index: dict[str, int] = {}
     def _init_random(self):
         if self.model_config.seed is None:
             self.model_config.seed = 0
@@ -508,10 +511,10 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
         multimodal_fns = multimodal_fns or {}
         self.precompile_vision_encoder_fn = multimodal_fns.get(
             "precompile_vision_encoder_fn", None)
-        self.get_multimodal_embeddings_fn = multimodal_fns.get(
-            "get_multimodal_embeddings_fn", None)
-        self.get_input_embeddings_fn = multimodal_fns.get(
-            "get_input_embeddings_fn", None)
+        self.embed_multimodal_fn = multimodal_fns.get("embed_multimodal_fn",
+                                                      None)
+        self.embed_input_ids_fn = multimodal_fns.get("embed_input_ids_fn",
+                                                     None)
         self.get_mrope_input_positions_fn = multimodal_fns.get(
             "get_mrope_input_positions_fn", None)
@@ -523,7 +526,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
             jax.random.key(self.model_config.seed)).params()
         self.is_multimodal_model = (
             self.model_config.is_multimodal_model
-            and self.get_multimodal_embeddings_fn is not None and hasattr(
+            and self.embed_multimodal_fn is not None and hasattr(
                 self.model_config.hf_config, "architectures"
             )  #TODO: Remove Llama Guard 4 specific condition once the LG4 Vision portion is implemented
             and len(self.model_config.hf_config.architectures) >= 1
@@ -545,7 +548,6 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
         self.topology_order_id = topology_order_id
         self.kv_cache_config = kv_cache_config
         self.use_hybrid_kvcache = len(kv_cache_config.kv_cache_groups) > 1
-        self.kv_caches = []
         self.kv_cache_manager.initialize_kv_cache(kv_cache_config)
         if has_kv_transfer_group():
             get_kv_transfer_group().register_runner(self)
@@ -827,7 +829,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
         sharding = None
         if self.dp_size > 1:
             sharding = NamedSharding(self.mesh,
-                                     PartitionSpec(ShardingAxisName.ATTN_DATA))
+                                     PartitionSpec(ShardingAxisName.MLP_DATA))
         tpu_sampling_metadata = TPUSupportedSamplingMetadata.from_input_batch(
             self.mesh, self.input_batch, padded_num_reqs, sharding=sharding)
@@ -1390,7 +1392,8 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
             self.mesh,
             self.input_batch,
             padded_num_reqs,
-            sharding=data_parallel_attn_sharding,
+            sharding=NamedSharding(self.mesh,
+                                   PartitionSpec(ShardingAxisName.MLP_DATA)),
         )
         if self.uses_mrope:
             positions = mrope_positions
@@ -1680,7 +1683,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
     def _get_input_ids_embeds(self, input_ids: jax.Array,
                               mm_embeds: list[jax.Array]):
         if self.is_multimodal_model:
-            inputs_embeds = self.get_input_embeddings_fn(
+            inputs_embeds = self.embed_input_ids_fn(
                 self.state,
                 input_ids,
                 mm_embeds,

tpu_inference/utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ import time
 from collections import defaultdict
 from collections.abc import Sequence
 from functools import wraps
-from typing import Any, Callable, List, Tuple
+from typing import Any, Callable, List, Tuple, Union
 import jax
 import jax.numpy as jnp
@@ -283,35 +283,6 @@ def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
     return utils.hashing.get_hash_fn_by_name(hash_fn_name)
-def quantize_kv(key: jax.Array, value: jax.Array,
-                kv_cache_quantized_dtype: jnp.dtype, k_scale: float,
-                v_scale: float) -> Tuple[jax.Array, jax.Array]:
-    """
-        Quantize the key and value tensors.
-        Args:
-            key: The key tensor to quantize.
-            value: The value tensor to quantize.
-            kv_cache_quantized_dtype: The dtype to quantize the key and value tensors to.
-            q_scale: The scale to quantize the key and value tensors by.
-            k_scale: The scale to quantize the key tensor by.
-            v_scale: The scale to quantize the value tensor by.
-        Returns:
-            Tuple[jax.Array, jax.Array]: The quantized key and value tensors.
-        """
-    dtype_info = jnp.finfo(kv_cache_quantized_dtype)
-    minval, maxval = float(dtype_info.min), float(dtype_info.max)
-    key = key.astype(jnp.float32) / k_scale
-    key = jnp.clip(key, minval, maxval)
-    key = key.astype(kv_cache_quantized_dtype)
-    value = value.astype(jnp.float32) / v_scale
-    value = jnp.clip(value, minval, maxval)
-    value = value.astype(kv_cache_quantized_dtype)
-    return key, value
 def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
     """
     Get the JAX dtype from a string dtype.
@@ -326,6 +297,36 @@ def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
     return to_jax_dtype(str_dtype)
+def get_mesh_shape_product(
+    mesh: Mesh,
+    axes: Union[str, list[str], None],
+) -> int:
+    """
+    Get the product of mesh dimensions for one or more axes.
+    Examples:
+        # Single axis (defaults to 1 if not present)
+        get_mesh_shape_product(mesh, "model")
+        # Multiple axes - computes product of their sizes
+        get_mesh_shape_product(mesh, ["model", "attn_dp"])
+        # None means no sharding on this dimension
+        get_mesh_shape_product(mesh, None)  # returns 1
+    """
+    if axes is None:
+        return 1
+    if isinstance(axes, str):
+        axes = [axes]
+    product = 1
+    for axis in axes:
+        product *= mesh.shape.get(axis, 1)
+    return product
 def time_function(func):
     """
     A decorator to measure the execution time of a function.

tpu_inference/worker/tpu_worker.py CHANGED Viewed

@@ -431,8 +431,11 @@ class TPUWorker:
     ) -> None:
         """Allocate GPU KV cache with the specified kv_cache_config."""
         # Precompile functions with large vocab_size tensors before allocating KV cache to avoid OOM
-        self.model_runner.compilation_manager._precompile_sampling()
-        self.model_runner.compilation_manager._precompile_gather_logprobs()
+        if not (envs.SKIP_JAX_PRECOMPILE or
+                (hasattr(self.model_runner.model_config, "enforce_eager")
+                 and self.model_runner.model_config.enforce_eager)):
+            self.model_runner.compilation_manager._precompile_sampling()
+            self.model_runner.compilation_manager._precompile_gather_logprobs()
         self.model_runner.initialize_kv_cache(kv_cache_config,
                                               self.topology_order_id)

{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tpu_inference
-Version: 0.12.0.dev20251222
+Version: 0.12.0.dev20251224
 Author: tpu_inference Contributors
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers

{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/RECORD RENAMED Viewed

@@ -7,21 +7,22 @@ tests/core/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
 tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
 tests/core/test_disagg_utils.py,sha256=A5icdqkJlau2PHYAxHfHKuqrlEKXVJu2nm02XOrXjcc,2530
-tests/core/test_dp_scheduler.py,sha256=H2IMS3FhG2hdYhUhdLjIfcVswLOm7rUW5EMsSP9mMc8,32197
+tests/core/test_dp_scheduler.py,sha256=m6ph_OH9tXz6AxNde8cIjptd1lwDVSCqIV2Ef-cNJFk,34253
 tests/core/test_init.py,sha256=5BDDC-dmDtWEGaBPjQSiYJuMiwTBVRSDx9p7Cv8DKyI,2262
 tests/distributed/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/distributed/test_distributed_utils.py,sha256=YXKbSG9J72vCrU5mPiFf1ya-Yzc1BjeahdBmQVez8Wc,5031
 tests/distributed/test_tpu_connector.py,sha256=ajKeRUi3x29hQXfLrSlo6yDczpwZsg_mGt2vKBGRZdk,20538
 tests/e2e/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/e2e/test_async_scheduler.py,sha256=215xGuyTEBSOe-c1l48TIjrCqhbVFZY3m5p3q5mU7jA,6905
-tests/e2e/test_data_parallel.py,sha256=yiBgGgA4SSmEF_agKKy4sexBatLfNS_Z9uma6fC0dBE,14403
+tests/e2e/test_data_parallel.py,sha256=KB-_BKic_iZyn4WbPWsUdVClinzd8g7PrQ0ui5B-nwo,10725
+tests/e2e/test_hybrid_kvcache.py,sha256=Y7a-grjvAKBbp7vbQncVEQKGM1WxcwO0qa2o0opKiEI,8076
 tests/e2e/test_local_disagg.py,sha256=xIjYI6RGA6bZk4dluklhfYBoJGbHkrSihSkJtPgpZv4,10434
 tests/e2e/test_model_loader.py,sha256=DYlS420KXkNzeIijAf-0UQsYH0pOAGcXRl6P99PBiAc,9366
 tests/e2e/test_multi_modal_inference.py,sha256=hVatj8Rra6XAekp6zBxRivQUcGiV8SimPph9cZ-TJyk,3896
 tests/e2e/test_pipeline_parallel.py,sha256=VpxY9wgQj3-i0XooHZHdmHGdMS3ilmHbxu6ZfyQDUP0,9519
 tests/e2e/test_runai_model_streamer_loader.py,sha256=MXUxKfKV7vVM_LI7-5hBV-wCswogPENkMPsREUjFu3I,3790
 tests/e2e/test_sampling_params.py,sha256=ibLWtJfS35HughdOBtXD2IcyWPXoZA4R4KwXz-RzgOY,10683
-tests/e2e/test_speculative_decoding.py,sha256=XJo3kgXdEGKOYzloqq5b8JB53ECfSxuI-6usvE4ZLho,9158
+tests/e2e/test_speculative_decoding.py,sha256=tj3VSJEi7r9aHjywZanlmfY4eS5Tfr5zPe9TH3PW5EY,9911
 tests/e2e/test_structured_decoding.py,sha256=QYh9WjGrzm7syeLrGUawA6cOkWlQqVpTn7W6qwt65NY,1863
 tests/executors/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/executors/test_ray_distributed_executor.py,sha256=rMazBfirGsehEUXgpIPJkw0z7xO4cnK2kzcgxjFA6Bo,8435
@@ -44,7 +45,7 @@ tests/layers/common/test_attention_interface.py,sha256=ke6h-e8CP-FhNY_ojKCYwyHgY
 tests/layers/common/test_quantization.py,sha256=JcwDrNTm6UlBSV3s3mwwvpxOjqBpZDJwnYYoj3DnS7A,5344
 tests/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/layers/jax/test_layers.py,sha256=L1xh_wniBtlfudya_WRmHUWOhEno0i6ikKE1XiBtaZs,5010
-tests/layers/jax/test_qwix.py,sha256=G7PrmkWkhQD8P0RvwnF-iyRoXO4d7g1Ce4ycaIjDQ_0,39727
+tests/layers/jax/test_qwix.py,sha256=V8MpFKJb5_evs-Z4WeZ5SxA-KAyFD6Qrex7ExywLxmE,39744
 tests/layers/jax/test_rope.py,sha256=0biwYRSRsKMaRHknc8v8Tfrt0bmJKQGeQLPqR_D04mM,3565
 tests/layers/jax/test_sharding.py,sha256=Hk1MWhIluOKIBx7-O9fKa1n6fF3SW7UMYsRI9AGzp_0,5914
 tests/layers/jax/test_transformer_block.py,sha256=Wpgowc0ZJnv1GUxcK-Op6CCYWjpqgUM0p3EANk-YWzc,5742
@@ -62,12 +63,12 @@ tests/layers/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs
 tests/layers/vllm/test_attention.py,sha256=NSbeKIi4eQj9RLiHeT-aEDvvsiHYbD3rk4uXq3_5_X8,13193
 tests/layers/vllm/test_awq.py,sha256=0aFURqn3zh0Ueytvfzy6SGon0gPRzk8Dn0DuCnpu_XQ,14479
 tests/layers/vllm/test_compressed_tensors_moe.py,sha256=jlMZcbQWlgaLX4pAlEMjZbJ7a0NyjxIhqXUW5DGH6KM,7385
-tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py,sha256=dkIBrnvOJkmTtoakiuIs66zpeoUSzZNL0i4eoZzdMRM,14347
-tests/layers/vllm/test_compressed_tensors_w8a8_int8.py,sha256=9fRxMVjzrrHCwKsLyShhk5AfnWWE-h2sdjCAjyRyqAE,15381
+tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py,sha256=eTXSWaPcKgOEAyWt7Xqqkhd0fa2J3QFa3QJBRQjTsaY,15521
+tests/layers/vllm/test_compressed_tensors_w8a8_int8.py,sha256=dCKHPos33MIwJnK96zlIUvJYN0PpRPXdyPyo-PnKH3U,16555
 tests/layers/vllm/test_fp8.py,sha256=ZvFTg4Umgg6W2RwElkIZ_Rls_XZJ8sEW7yww2K3ztf4,666
-tests/layers/vllm/test_mxfp4.py,sha256=CTxh0h0iG17Rvk4eM62sTGcNG-1rPp7FNomt_9zMW5o,10828
-tests/layers/vllm/test_unquantized.py,sha256=iP5qWKCiqkb4BIjuZA_PepCxMrXJv9rBZ9y5vxbWZ6M,22446
-tests/layers/vllm/utils.py,sha256=bozp79XO3ofy1I6aIwjEtfo9vmKoOGYIC9YN1ba4sIA,2502
+tests/layers/vllm/test_mxfp4.py,sha256=sFer788F7pbDUtB0yB6WU9Lh9kzUOaxXP6XXVsvQHrc,11625
+tests/layers/vllm/test_unquantized.py,sha256=iqoqib_Rv2DdmKA2ub6T6cIT67PSTc3s7gpYzBTs_qI,24432
+tests/layers/vllm/utils.py,sha256=Qk67IqSrSovhPlWmDGFBr5vwgwtG7kcUzy69-oPgR0A,3105
 tests/lora/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/lora/conftest.py,sha256=OI4gPV4vNOCcfE93ccmIWQHd8-Gp9c2yGVlaSnuT4Tg,1559
 tests/lora/test_bgmv.py,sha256=B1HCjh27379vCxZsd8nKMBZ8lr1JamuuWDgYiALyn18,1934
@@ -77,7 +78,7 @@ tests/lora/test_lora_perf.py,sha256=zcZud9Hexx6wa9qX0IvnjKyDD-i61NdIQrVO31Yx3vU,
 tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
 tests/models/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/models/common/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tests/models/common/test_model_loader.py,sha256=78JHAh7zoCccYGwSr6Y2uXAOLY3qircICDszj4sRuic,17402
+tests/models/common/test_model_loader.py,sha256=Sf-k_Kxdjkz-lS_0-ICfA4Yk2VXX33esP8PNG4B7FzA,17392
 tests/models/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tests/models/jax/test_deepseek_v3.py,sha256=9RY6ypfvPts3NOnvWu9n_T7pUjrvj_QY_saLOKpFg4c,16243
 tests/models/jax/test_llama3.py,sha256=NYsT35yh9GzkYYcLcOo1BkBGGr14E89GtdCJJ6SFhI8,6610
@@ -85,7 +86,7 @@ tests/models/jax/test_llama4.py,sha256=MMQzTymnVUdWZ6XoOD8k9Q2ikmAk6tFSGB1C5DCi7
 tests/models/jax/test_llama_eagle3.py,sha256=DCk1ae9SLJUrqyx7uvNOmpqAAM09xb0rYNOst-Leo_M,7777
 tests/models/jax/test_llama_guard_4.py,sha256=w-8cKwuTRFyzDh2mxvAofrt5xUprZyqRm5DRVRamGwE,9322
 tests/models/jax/test_qwen2.py,sha256=xylG-LmHBSy76V-Yl5KiAXogpZPM2w3Mx0E61Ud5sO4,6227
-tests/models/jax/test_qwen2_5_vl.py,sha256=Wy9rlizi0t9Afks-wBWOYbNRnAgvvOOXgoRMiYxjzqE,26339
+tests/models/jax/test_qwen2_5_vl.py,sha256=PfB_gecAvXNrksxt8E56yP6d8ioZZWMoUIvh-OrbzJ4,26299
 tests/models/jax/test_qwen3.py,sha256=NWLAZPwGIhZjW0OADk4JqU4ZPn8JGSGPwkbTQvKEc50,6021
 tests/models/jax/test_weight_loading.py,sha256=RlmByQcjrsefybeNlS9wnL522be6CSR7YLcb7O5eZ-A,5205
 tests/models/jax/utils/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
@@ -97,12 +98,12 @@ tests/runner/test_block_table.py,sha256=gFGF425mpWfOLjnQeQiG18TqFko8vpilJ3AiiiV1
 tests/runner/test_input_batch.py,sha256=7nEkB00JrhaKCKf1ep28iedYbNbuqEdaQAxYqHaXThc,8198
 tests/runner/test_kv_cache.py,sha256=TvxmJNI8lM0ZNllZonHySA8NCQZ7prBgNODpYEI787E,7394
 tests/runner/test_kv_cache_manager.py,sha256=dYVWQamfGwqytnumfvjRt2r3n9BRBqcSbCXGWnw1SXs,22461
-tests/runner/test_multimodal_manager.py,sha256=EmQ_SsfdD7xroKNsN-q2VEs8fLkeQEWLCNxr9fxOnGc,18618
+tests/runner/test_multimodal_manager.py,sha256=8RbHHMvRuHg1Scc0b70tsr-tF2lfk8SZVx3InVgIryc,18591
 tests/runner/test_persistent_batch_manager.py,sha256=EW6P-BtI4i59Clx-Lh84fU1GtDKF3Av2gtO-rCRYN_k,3148
 tests/runner/test_speculative_decoding_manager.py,sha256=HgemtiBL_VhBheUgem3OpPj6yBK9vdJsL8VCABQdGXw,16093
 tests/runner/test_structured_decoding_manager.py,sha256=pVX3z2TLR6SfBoEyRtv0BPajHbMVdcOAe4opMoxEpps,9802
-tests/runner/test_tpu_runner.py,sha256=zR36Jjrb1cOygQnbJ6LlpoWzb8Ix76fHs7jTWH2GVwE,9004
-tests/runner/test_tpu_runner_dp.py,sha256=eGC_pUQzivCjCEeLtI4ZruMY2Qg34Oj6dHhtUKvtyXY,47901
+tests/runner/test_tpu_runner.py,sha256=H1RjGGvNPfNNhglbiUs9J2QsokXaDtnmmtdoYRvA5_8,11649
+tests/runner/test_tpu_runner_dp.py,sha256=TAEmI-JaIodgYNjjjQAAQg-q0bSbeVON5ZZE2jngfOk,50851
 tests/runner/test_tpu_runner_mesh.py,sha256=kDyjdnd0vO4GQrcOAPLr9TEYA49-qDFE4gHt9IL6wlk,8638
 tests/runner/test_utils.py,sha256=_R2bnKttqgg7vfPXP0Qfx38mr-4UBm2UMIbuQFAwgWk,15442
 tests/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
@@ -114,13 +115,13 @@ tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc
 tpu_inference/envs.py,sha256=A1Bdm5qiXhTdu-Q_yNzBpi79_nOJIDbdFF7MAMqmjxo,6662
 tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
 tpu_inference/tpu_info.py,sha256=lty-ngN1uUvQLlFGkWa2u5eEb5anwmcv_uyI0S95PdY,2840
-tpu_inference/utils.py,sha256=Gx9AKphXvY5ltfXL5DNKEH-I7LN6V4ZIv7cqTgxMtaI,11088
+tpu_inference/utils.py,sha256=0fQXcZJ4IiPGlNv_bLdkla5FeEEKEzyTsSDH-y47ouo,10641
 tpu_inference/core/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
 tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
 tpu_inference/core/disagg_utils.py,sha256=lv8MAVoAjtcmTaenUXVokg2q3d0tzsma86UiQlQ3omY,1492
 tpu_inference/core/sched/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tpu_inference/core/sched/dp_scheduler.py,sha256=b55aIN6EzRpKD5inItO1klHmhRoq0gb1fdnV01N3Nbw,33345
+tpu_inference/core/sched/dp_scheduler.py,sha256=-7d2zopJ5ZJFIJ8LbHsm_4bBBtP7qrim4XWVPDF6vrg,34960
 tpu_inference/distributed/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/distributed/jax_parallel_state.py,sha256=xMK0tEtblh37_LoHvp1-6qPI8AgX4HkE0ATuc7fdHKs,2798
 tpu_inference/distributed/tpu_connector.py,sha256=3rR0y2P1MOOSM8nBfvl95ZQcVKMms3rL8zTdnxUmSms,29946
@@ -144,7 +145,7 @@ tpu_inference/kernels/megablox/common.py,sha256=CoJPNom6anJU9B4i05d2skytJEvNS994
 tpu_inference/kernels/megablox/gmm.py,sha256=rVW70SGPshR9XvHiwzmskX4_yeD4nE8or3RfabwcCLM,24240
 tpu_inference/kernels/mla/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/kernels/mla/v1/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tpu_inference/kernels/mla/v1/kernel.py,sha256=jLV3KvNj1sxIuGymKva-qYxVTuO2vxRG12JFbZ6utBs,50796
+tpu_inference/kernels/mla/v1/kernel.py,sha256=oovjb0x3qz08IL_KVjLLbNbcEcFXip55fqgIgfnl3RA,49758
 tpu_inference/kernels/quantized_matmul/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/kernels/quantized_matmul/kernel.py,sha256=-A9Kd2ApHWgPvCaUPfjM5JooLz_iCfWV1UT0taaZaAo,16264
 tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py,sha256=3zhIm73JEE8qOty2_0v3AJlVz13k6qMB5wlXBDyC1EM,35130
@@ -167,7 +168,7 @@ tpu_inference/layers/common/attention_metadata.py,sha256=rmipY517sefHe4owxC5USkm
 tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
 tpu_inference/layers/common/quant_methods.py,sha256=SCm9g7bE02XSMONmOCuT0vfHeTP6RzGQ57aTj919HgM,772
 tpu_inference/layers/common/quantization.py,sha256=cTuoCpU3qBdPvoy_6R6uwCyz9ojh6esvl9x3bQeMbs4,8710
-tpu_inference/layers/common/sharding.py,sha256=Oh0aFBVXo1eLnfukSSc7ppPo23Rwlf7VGNiMR7Yxtfo,25985
+tpu_inference/layers/common/sharding.py,sha256=curCejZPj8ND4rxjWEbwRozkFYlK_HlpIyTywhDHcWU,26171
 tpu_inference/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/layers/jax/base.py,sha256=UhT4ut_59ynUPdaZGpMPSCQkPTWXA9BxkaPy7lDhoLI,6350
 tpu_inference/layers/jax/constants.py,sha256=YQJOeAbja1yTbPhoOWMp24OF1RCMwPybK1NIwPrrYJ0,3329
@@ -179,28 +180,28 @@ tpu_inference/layers/jax/rope_interface.py,sha256=cPqVpKG5_SU7S7xcrMEaPBJLqi1nC4
 tpu_inference/layers/jax/transformer_block.py,sha256=HTI0fYPQd23UbnJSB_pL2K3un3q_i3guvJiNCUReVRs,4492
 tpu_inference/layers/jax/attention/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/layers/jax/attention/attention.py,sha256=_N5W4ox8EzC1CZYcIhsEi35X8WCIMFEBlSzVtDDcTu8,10623
-tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=DIT6FEEdAdOpH8Qpq0kt4sNCBKRi4xFJ8y6rq5Qp4ag,22370
-tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=adzhOODAT-fFp6L6joGfHQEfwhm2PX-jv4lY9mCz85o,9233
-tpu_inference/layers/jax/attention/llama4_attention.py,sha256=Q36xTJe9IZqIGZca4vv4o7OpxzZS6XWXbW4UV-EmNW0,6737
+tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=KP-hgck-wTzTcwDNB08DwNiqsE-6OD4tQ1jLVwWQvEw,22427
+tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=EM1kJpr77VHh95aSD5UnSJazB_anS_7PyaD8TixVMrY,9241
+tpu_inference/layers/jax/attention/llama4_attention.py,sha256=QzBDoEioI9mMdI1T2LNlsr89iaGl234e-9s202YWS8M,6713
 tpu_inference/layers/jax/moe/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/layers/jax/moe/deepseek_v3_moe.py,sha256=5j6TJO8fAB2Yv6mVAeM2F9WLe4QDM9bf6zxtdKjHjCQ,26456
 tpu_inference/layers/jax/moe/gpt_oss_moe.py,sha256=-uliFqHJFOTT9WJCEpGhkImOXMSoo3aePXMOmKXlgmk,6771
 tpu_inference/layers/jax/moe/moe.py,sha256=E7L8bJucTVke89o048GAbWdtuQIL5oDz-MkW0NK4E00,10114
 tpu_inference/layers/jax/sample/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=VqN0mxi7Xg58w4EXS625ndC8NyA_UZMV9bjFM1mkvrY,21000
-tpu_inference/layers/jax/sample/sampling.py,sha256=kAH3S9LZTPCegkFgJ1XSI5rO64v6jAcc7jr3G5mnY3o,3889
+tpu_inference/layers/jax/sample/sampling.py,sha256=IfJBFSXuTdd0QELn8Opmh7HgdzKreIwGYUOskTFp4aI,3888
 tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=bip7TQcw-VHyN6072zBQY-tA0-QTyJpnuYg04mw9Sv0,3136
 tpu_inference/layers/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/layers/vllm/attention.py,sha256=LMQbS2KAup0Q-mmN5pzV6uUs-qdGpTSH8eV6ByHde9g,7370
-tpu_inference/layers/vllm/fused_moe.py,sha256=CYHksNA3FYWsNNS5d5dIdkaq6s41EMAB1sIPRSxY7RM,18747
+tpu_inference/layers/vllm/fused_moe.py,sha256=NdMVpDLI5-5274EuhVtH8KZzCnLBqSZSSvRoZqzwY7s,19868
 tpu_inference/layers/vllm/linear_common.py,sha256=--jpy8vf0RkZ0jDU1QfXT-V-RnjIloNSodYQKiw4Txo,9129
-tpu_inference/layers/vllm/sharding.py,sha256=f8iZHQOO4cxl8GO_2_4SKaky9QKXnz6OAtiT40F1Sgo,9744
+tpu_inference/layers/vllm/sharding.py,sha256=f3pu7CJNRkfq5j1bmhmTM5wU9HwAePH3yWeTmaIINAw,9926
 tpu_inference/layers/vllm/quantization/__init__.py,sha256=XYe1VwgoFqLTuLJ-i-64hzNNMSWOkoErLTA_4N_Cze0,2463
 tpu_inference/layers/vllm/quantization/awq.py,sha256=nlWwR08lPlE_HIXLoDiGS2pOAJEiI0ukUGXos0NpbAE,9072
-tpu_inference/layers/vllm/quantization/common.py,sha256=139fpfCNJCBBMUSbmAXs7UjrZA68mfTN7uOGvtUd1yk,4899
+tpu_inference/layers/vllm/quantization/common.py,sha256=GXYEvnhvRF8VWX0SHkzRpV3_LoQgAwCmXwLbEEwSm8A,5021
 tpu_inference/layers/vllm/quantization/fp8.py,sha256=_NT7QOD-N3UAJnYSDJD24Tsp8FaSK6NuDYp78QOTyzo,4530
-tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=JIaMQ8mco7l-rvEgnPAb_HbKJ4sxMqihpIWmodZ_yT0,17581
-tpu_inference/layers/vllm/quantization/unquantized.py,sha256=RVjjRsn6kcKu5RfRySrOHmCV-W8jjIwX2iYit3qygy8,16149
+tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=oS7e40ovqmipDKiHfpezzdP2RaFSNeXw6zv2nTrwKvc,18214
+tpu_inference/layers/vllm/quantization/unquantized.py,sha256=LPGRKw3lkeCHCJsY70P_hDUagnmI5bNe1cHuTFUEBkc,16701
 tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=2RS8owCqKHXZbtWKNjdKtsfzKH9N60UyqD-ug1A83oE,5914
 tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=aOme0LFnhVeXDxdSw-Z0k5MZutM3-EoF2vwffezCARE,11277
@@ -212,49 +213,49 @@ tpu_inference/lora/torch_lora_ops.py,sha256=YR3Hj8nLLiQ-6wXy4uFsjQxFTbJYZ4o5dh_L
 tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
 tpu_inference/models/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/models/common/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tpu_inference/models/common/model_loader.py,sha256=dzir8-QpqJN680XbBAl3E8eUXeYeniNRwYvZCFvWfBc,21878
+tpu_inference/models/common/model_loader.py,sha256=TnTTION_J3nMGsIMJFuMzoBSl3VHHvhretmF5gMkyXI,21679
 tpu_inference/models/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/models/jax/deepseek_v3.py,sha256=mje3RgxE1NwKWVLgJnPq3ebWB1J8T6YGHT2TtxN10Dg,45031
 tpu_inference/models/jax/gpt_oss.py,sha256=bgdsCx3UcTqEJatWBYbma5HNHH8GEaHN4aL5IsAeSmM,21592
 tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=XKpDgPkOiRtYaPrW76ILxcp2uFfSiE1JMdqHWGo0-Ss,3179
-tpu_inference/models/jax/llama3.py,sha256=WJDldaWgVmeJNg73Nj7A_sD1rmaH6GsgedPOSULxzFo,16093
+tpu_inference/models/jax/llama3.py,sha256=FjTGC69V_EJmvb5BIqYu3V5NS1Pvy-5Pb34kMn5YU5U,16317
 tpu_inference/models/jax/llama4.py,sha256=Ssycb5fcGjhJYg8FfcNckVhow7bvVt0FJbbpHinzMAA,30206
 tpu_inference/models/jax/llama_eagle3.py,sha256=_wnljvb8lLCQ0Z3Vuw0QI7F6b41x6I1WuvstZWGvCYE,13051
-tpu_inference/models/jax/llama_guard_4.py,sha256=3jZP3Pkp-iDpS_9M9x6UNorGaWF9bssT9fi3Jmn4ja0,15850
-tpu_inference/models/jax/qwen2.py,sha256=NkFZTNtJjHjRhasDLtEtvNmH9i_AiM7OBDysLbBhXPk,13985
-tpu_inference/models/jax/qwen2_5_vl.py,sha256=XqvjmUpNmmDT0uRmwTq_nJNoBoLAbPRcbwgllNYkYLc,50408
+tpu_inference/models/jax/llama_guard_4.py,sha256=R4wo45s1JsVD39t8JeAItujGoi-sl43HBH95hr7qEVw,15845
+tpu_inference/models/jax/qwen2.py,sha256=bart2yYGv0J-lNbk8Hk5jn5IF6j_Jp8YKSEjwVU_y24,14038
+tpu_inference/models/jax/qwen2_5_vl.py,sha256=3g3tUt7c83fKOdiMzuq2VyldCyeXoCBGrVYfqyIWwGE,50370
 tpu_inference/models/jax/qwen3.py,sha256=jVOOVrBFnxRIZ_Euo90iCga8rORpz0Kqs79uKqsFwEQ,11678
 tpu_inference/models/jax/utils/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/models/jax/utils/file_utils.py,sha256=8iZcGNvF1N0gNioH8fBlVYTSGYn4fC2WvmlTyeDZyZM,3415
-tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=CptSd8wk0KoXODCjMHjl62bp_41b2DNu2cxcCy4TAEg,6731
+tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=c2LRXdOPi3F779yg2UX-DnuFDxF1JciTcFa09iODxZs,6695
 tpu_inference/models/jax/utils/weight_utils.py,sha256=0xyjGlDSrA09gtb4plw9yX57VPMgn3o5WNl6mXPDU70,23121
 tpu_inference/models/jax/utils/qwix/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=JOl3j4YO0P90ue0vsy-ZzNVGluh-VslAMOI-9wb1Igw,29288
+tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=w3wmDb1drJxOK1mVRVMORznqKbtZqFfi7H0Ib_k-iW8,29526
 tpu_inference/models/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=UkTyx4IzHUrdnyv0uBpqG6DR0MaPCyRzTNnE_COjhlI,12896
+tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=G4ClHbvMY0gPpTOFWStb1mEVVMzIc3-wz1KXC-mDpj8,13023
 tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=vsXQnC2aZ_mHKb-7d9UeN28lfawfApNTm5asUMgEhgo,1762
 tpu_inference/platforms/__init__.py,sha256=BK6rwAhiqVSAUJ9m9EehSKetA6hEPe92flD9Ei076WQ,649
-tpu_inference/platforms/tpu_platform.py,sha256=00BCnL-xO611RfxxTFd2YMu9BvEvdKcGnQ_QTRIKEoM,9789
+tpu_inference/platforms/tpu_platform.py,sha256=bGTH1k0GI5AB_He5IghJtPwuyrhceUQ-rHs41fMpwhI,9826
 tpu_inference/runner/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
-tpu_inference/runner/compilation_manager.py,sha256=TSinw_COpMbLLOdH78e0H3S7F3vStYSRDB6aj_KEi9w,41456
+tpu_inference/runner/compilation_manager.py,sha256=BFjOzJUyEJTmUZAvGCm3yeqoY7Kkw2JKc_A3CzRoN7o,42112
 tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
 tpu_inference/runner/kv_cache.py,sha256=xpB6VTrT3lIq5JNNPJTVEnHFgehIzgxKNIHxxXIxwKI,6046
-tpu_inference/runner/kv_cache_manager.py,sha256=Bd5nMH-KupjeuDpn9pHdV4NzZ7inVa-bSrVGF3AYgRo,23417
+tpu_inference/runner/kv_cache_manager.py,sha256=u6pXaWPzmPe34lXiy-acAdGBmp9WEQrGvksyBfGBRdM,23342
 tpu_inference/runner/lora_utils.py,sha256=DGV_8aMrqb6Q4v7eC0UvipsM-6XQSt1afiZGKTKd6sc,4418
-tpu_inference/runner/multimodal_manager.py,sha256=qkDiQ2_dL5y5gndq3VtctnAHQxBkIC1HGS8haPEKuBo,10353
+tpu_inference/runner/multimodal_manager.py,sha256=dQm0sQ9nGHaWRS8rVPDBZP4P6jNFcJPufnAxv8DoWYs,10344
 tpu_inference/runner/persistent_batch_manager.py,sha256=aCeTyqCgBnQy_6hXjiNLtF81ekG0-YwlQiWeJhx-pdM,13838
 tpu_inference/runner/speculative_decoding_manager.py,sha256=-eSxTIGXbRWRZjHJfikb7kfqbtr_cj7Pca9zInWSn1w,10790
 tpu_inference/runner/structured_decoding_manager.py,sha256=sj1fPrit0qdhcQtDbue5kpxos7zL16_dZQ5YSXTDbzg,4148
-tpu_inference/runner/tpu_runner.py,sha256=gDKkPJtlNqitoKQJb4EtdbUrV8Mzf1JJPRYHAnwGjnI,80544
+tpu_inference/runner/tpu_runner.py,sha256=cgIyZiI3UjpvPWhNRL-mCSnssbbDNt00g5idAzwgWR0,80736
 tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
 tpu_inference/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/spec_decode/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
 tpu_inference/spec_decode/jax/eagle3.py,sha256=5WtEbkgzXpmFz374ibQD5IIcRro4d0SNeCYgBv2nM1c,19678
 tpu_inference/worker/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
-tpu_inference/worker/tpu_worker.py,sha256=ZVfXnSGWqY-dbRpN1lXm8n6CDHUkvIHXjsR4HT74ICU,21456
-tpu_inference-0.12.0.dev20251222.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-tpu_inference-0.12.0.dev20251222.dist-info/METADATA,sha256=X5KHuShBIJ_5jo8GlzI1ji9YHKzdXbvpsk3qQczY5ac,5767
-tpu_inference-0.12.0.dev20251222.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-tpu_inference-0.12.0.dev20251222.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
-tpu_inference-0.12.0.dev20251222.dist-info/RECORD,,
+tpu_inference/worker/tpu_worker.py,sha256=ntwCibPyiw-z8aMUdtu8usqU_q2b0u7diWNOmpjG_6o,21651
+tpu_inference-0.12.0.dev20251224.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+tpu_inference-0.12.0.dev20251224.dist-info/METADATA,sha256=gVLZ-35W1Nw3z2LnxeFYsNQHMRtTM7aUIAuWbxucsBg,5767
+tpu_inference-0.12.0.dev20251224.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+tpu_inference-0.12.0.dev20251224.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
+tpu_inference-0.12.0.dev20251224.dist-info/RECORD,,

{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/WHEEL RENAMED Viewed

File without changes

{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/top_level.txt RENAMED Viewed

File without changes

tpu-inference 0.12.0.dev20251222__py3-none-any.whl → 0.12.0.dev20251224__py3-none-any.whl

tpu-inference 0.12.0.dev20251222py3-none-any.whl → 0.12.0.dev20251224py3-none-any.whl