PyPI - tpu-inference - Versions diffs - 0.11.1.dev202511270815__py3-none-any.whl → 0.11.1.dev202512030818__py3-none-any.whl - Mend

tpu-inference 0.11.1.dev202511270815py3-none-any.whl → 0.11.1.dev202512030818py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tpu-inference might be problematic. Click here for more details.

Files changed (25) hide show

tpu_inference/spec_decode/jax/eagle3.py CHANGED Viewed

@@ -6,6 +6,9 @@ from typing import Any, Optional
 import jax
 import jax.numpy as jnp
 import numpy as np
+from flax import nnx
+from jax import lax
+from jax.sharding import NamedSharding, PartitionSpec
 from vllm.config import VllmConfig
 from tpu_inference.layers.common.attention_metadata import AttentionMetadata
@@ -127,6 +130,17 @@ class Eagle3Proposer:
                                            max_num_blocks_per_req)
         new_block_tables = jnp.where(expanded_exceeds_mask, -1, block_tables)
+        positions = lax.with_sharding_constraint(
+            positions, NamedSharding(self.mesh, PartitionSpec(None, )))
+        clamped_positions = lax.with_sharding_constraint(
+            clamped_positions, NamedSharding(self.mesh, PartitionSpec(None, )))
+        new_seq_lens = lax.with_sharding_constraint(
+            new_seq_lens, NamedSharding(self.mesh, PartitionSpec(None, )))
+        query_start_loc = lax.with_sharding_constraint(
+            query_start_loc, NamedSharding(self.mesh, PartitionSpec()))
+        new_block_tables = lax.with_sharding_constraint(
+            new_block_tables, NamedSharding(self.mesh, PartitionSpec(None, )))
         return positions, clamped_positions, new_seq_lens, query_start_loc, new_block_tables
     @functools.partial(jax.jit, static_argnums=(0, ))
@@ -138,6 +152,7 @@ class Eagle3Proposer:
     @functools.partial(jax.jit, static_argnums=(0, ))
     def _prepare_hidden_states_and_input_ids(
         self,
+        state: nnx.State,
         aux_hidden_states: tuple[jax.Array, ...],
         query_start_loc: jax.Array,
         target_token_ids: jax.Array,
@@ -146,7 +161,7 @@ class Eagle3Proposer:
     ) -> tuple[jax.Array, jax.Array, jax.Array]:
         target_hidden_states = jnp.concatenate(aux_hidden_states, axis=-1)
         target_hidden_states = self.combine_hidden_states_fn(
-            self.state, target_hidden_states)
+            state, target_hidden_states)
         input_ids, last_token_indices = self._prepare_input_ids(
             query_start_loc, target_token_ids, next_token_ids, num_reqs)
@@ -193,8 +208,8 @@ class Eagle3Proposer:
                                     block_tables=device_array(
                                         self.mesh, block_tables))
             target_hidden_states, input_ids, last_token_indices = self._prepare_hidden_states_and_input_ids(
-                aux_hidden_states, attn_metadata.query_start_loc, input_ids,
-                next_token_ids, num_reqs)
+                self.state, aux_hidden_states, attn_metadata.query_start_loc,
+                input_ids, next_token_ids, num_reqs)
             return target_hidden_states, input_ids, last_token_indices, attn_metadata
         # Host copies from the metadata prepared by the runner.
@@ -258,12 +273,13 @@ class Eagle3Proposer:
         attn_metadata = replace(attn_metadata, block_tables=block_tables)
         return self._filter_token_and_prepare_initial_inputs(
-            token_indices, query_start_loc, seq_lens, input_ids,
+            self.state, token_indices, query_start_loc, seq_lens, input_ids,
             aux_hidden_states, attn_metadata, next_token_ids, num_reqs)
     @functools.partial(jax.jit, static_argnums=(0, ))
     def _filter_token_and_prepare_initial_inputs(
         self,
+        state: nnx.State,
         token_indices: jax.Array,
         query_start_loc: jax.Array,
         seq_lens: jax.Array,
@@ -291,35 +307,51 @@ class Eagle3Proposer:
         )
         target_hidden_states, input_ids, last_token_indices = self._prepare_hidden_states_and_input_ids(
-            [h[token_indices] for h in aux_hidden_states], query_start_loc,
-            target_token_ids, next_token_ids, num_reqs)
+            state, [h[token_indices] for h in aux_hidden_states],
+            query_start_loc, target_token_ids, next_token_ids, num_reqs)
         return target_hidden_states, input_ids, last_token_indices, attn_metadata
     @functools.partial(jax.jit, static_argnums=(0, ))
     def _select_draft_token_ids(
         self,
+        state: nnx.State,
         hidden_states: jax.Array,
         last_token_indices: jax.Array,
     ) -> jax.Array:
         sample_hidden_states = hidden_states[last_token_indices]
-        return self._get_draft_token_ids(sample_hidden_states)
+        sample_hidden_states = lax.with_sharding_constraint(
+            sample_hidden_states,
+            NamedSharding(self.mesh, PartitionSpec(None, None)))
+        return self._get_draft_token_ids(state, sample_hidden_states)
     @functools.partial(jax.jit, static_argnums=(0, ))
-    def _get_draft_token_ids(self, hidden_states: jax.Array) -> jax.Array:
+    def _get_draft_token_ids(self, state: nnx.State,
+                             hidden_states: jax.Array) -> jax.Array:
         lora_metadata = None
-        logits = self.compute_logits_fn(self.state, hidden_states,
-                                        lora_metadata)
-        return jnp.argmax(logits, axis=-1)
+        logits = self.compute_logits_fn(state, hidden_states, lora_metadata)
+        draft_token_ids = jnp.argmax(logits, axis=-1)
+        return lax.with_sharding_constraint(
+            draft_token_ids, NamedSharding(self.mesh, PartitionSpec()))
     @functools.partial(jax.jit, static_argnums=(0, ))
     def _select_inputs_for_loop_speculation(
-            self, positions: jax.Array, residual: jax.Array,
+            self, state: nnx.State, positions: jax.Array, residual: jax.Array,
             hidden_states: jax.Array,
             last_token_indices: jax.Array) -> tuple[jax.Array, jax.Array]:
-        return positions[last_token_indices], residual[
-            last_token_indices], self._select_draft_token_ids(
-                hidden_states, last_token_indices)
+        positions = positions[last_token_indices]
+        residual = residual[last_token_indices]
+        draft_token_ids = self._select_draft_token_ids(state, hidden_states,
+                                                       last_token_indices)
+        positions = lax.with_sharding_constraint(
+            positions, NamedSharding(self.mesh, PartitionSpec(None, )))
+        residual = lax.with_sharding_constraint(
+            residual, NamedSharding(self.mesh, PartitionSpec(None, None)))
+        draft_token_ids = lax.with_sharding_constraint(
+            draft_token_ids, NamedSharding(self.mesh, PartitionSpec()))
+        return positions, residual, draft_token_ids
     def propose(
         self,
@@ -346,11 +378,11 @@ class Eagle3Proposer:
         if self.num_speculative_tokens == 1:
             return kv_caches, self._select_draft_token_ids(
-                hidden_states, last_token_indices)
+                self.state, hidden_states, last_token_indices)
         positions, hidden_states, draft_token_ids = self._select_inputs_for_loop_speculation(
-            attn_metadata.input_positions, residual[0], hidden_states,
-            last_token_indices)
+            self.state, attn_metadata.input_positions, residual[0],
+            hidden_states, last_token_indices)
         draft_token_ids_list = [draft_token_ids]
@@ -375,7 +407,8 @@ class Eagle3Proposer:
                 attn_metadata,
             )
             hidden_states = residual[0]
-            draft_token_ids = self._get_draft_token_ids(new_hidden_states)
+            draft_token_ids = self._get_draft_token_ids(
+                self.state, new_hidden_states)
             draft_token_ids_list.append(draft_token_ids)
         # [batch_size, num_speculative_tokens]

tpu_inference/utils.py CHANGED Viewed

@@ -8,11 +8,14 @@ from typing import Any, Callable, List, Tuple
 import jax
 import jax.numpy as jnp
 import numpy as np
+import torch
 from jax._src import dtypes
 from jax._src import mesh as mesh_lib
 from jax._src import xla_bridge as xb
 from jax._src.lib import xla_client as xc
+from jax._src.numpy.scalar_types import _ScalarMeta
 from jax.sharding import Mesh, NamedSharding, PartitionSpec
+from torchax.ops.mappings import j2t_dtype, t2j_dtype
 from vllm import envs as vllm_envs
 from vllm import utils
@@ -23,17 +26,36 @@ GBYTES = 1024 * 1024 * 1024
 TPU_HEAD_SIZE_ALIGNMENT = 128
 TPU_SECOND_LAST_MINOR = 8
-# This is used to translate from a string name for a dtype
-# to formal jax.numpy DType.  One use case for this is
-# converting the `--kv_cache_dtype` flag to a dtype.
-TPU_STR_DTYPE_TO_JAX_DTYPE = {
-    "bfloat16": jnp.bfloat16,
+# Map vllm dtype string that doesn't exactly match jax dtype string name.
+_VLLM_DTYPE_STR_TO_JAX_DTYPE = {
     "fp8": jnp.float8_e4m3fn,
-    "fp8_e4m3": jnp.float8_e4m3,
+    "fp8_e4m3": jnp.float8_e4m3fn,
     "fp8_e5m2": jnp.float8_e5m2,
-    "int8": jnp.int8,
 }
+def to_jax_dtype(dtype: str | jnp.dtype | torch.dtype) -> jnp.dtype:
+    if isinstance(dtype, str):
+        if dict_dtype := _VLLM_DTYPE_STR_TO_JAX_DTYPE.get(dtype, None):
+            return dict_dtype
+        return jnp.dtype(dtype)
+    elif isinstance(dtype, torch.dtype):
+        return t2j_dtype(dtype)
+    elif isinstance(dtype, jnp.dtype):
+        return dtype
+    elif isinstance(dtype, _ScalarMeta):
+        return dtype.dtype
+    else:
+        raise ValueError(f"Argument is unsupported data type {type(dtype)}")
+def to_torch_dtype(dtype: str | jnp.dtype | torch.dtype) -> torch.dtype:
+    # Use jax dtype as an intermediate dtype which we'll be used to convert it
+    # into torch dtype.
+    dtype = to_jax_dtype(dtype)
+    return j2t_dtype(dtype)
 _megacore = False
 logger = init_logger(__name__)
@@ -295,8 +317,8 @@ def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
     Returns:
         jnp.dtype: The JAX dtype.
     """
-    str_dtype = str_dtype.lower().strip()
-    return TPU_STR_DTYPE_TO_JAX_DTYPE.get(str_dtype)
+    # TODO(kyuyeunk): Replace all reference of this function into TpuDtype.
+    return to_jax_dtype(str_dtype)
 def time_function(func):

tpu_inference/worker/tpu_worker.py CHANGED Viewed

@@ -108,7 +108,7 @@ class TPUWorker:
         if self.model_config.trust_remote_code:
             # note: lazy import to avoid importing torch before initializing
-            from vllm.utils import init_cached_hf_modules
+            from vllm.utils.import_utils import init_cached_hf_modules
             init_cached_hf_modules()
@@ -357,7 +357,7 @@ class TPUWorker:
         if is_start:
             options = jax.profiler.ProfileOptions()
             # default: https://docs.jax.dev/en/latest/profiling.html#general-options
-            options.python_tracer_level = os.getenv("PYTHON_TRACER_LEVEL", 0)
+            options.python_tracer_level = envs.PYTHON_TRACER_LEVEL
             options.host_tracer_level = os.getenv("HOST_TRACER_LEVEL", 1)
             jax.profiler.start_trace(self.profile_dir,
                                      profiler_options=options)

{tpu_inference-0.11.1.dev202511270815.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tpu_inference
-Version: 0.11.1.dev202511270815
+Version: 0.11.1.dev202512030818
 Author: tpu_inference Contributors
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers

{tpu_inference-0.11.1.dev202511270815.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_base.py,sha256=Ct5WFRMHL7IHEIxk8FrzAvO8m0xFuDpzDBKkAKKAL2Q,7341
-tests/test_envs.py,sha256=Woyfp_d5HS-uTGo4_u9dYlBbgmhfIEoFb-Rx_k7YXD4,6298
+tests/test_envs.py,sha256=h502VxL2gvhECm8u5uDh5JTGvhFf_DfQO88SpqOFMzE,7135
 tests/test_quantization.py,sha256=IT5ASyS1uuWcxc22kRtBcA-V4j3Z3hb7pMztm3GOlBs,34445
 tests/test_tpu_info.py,sha256=ZrwlMsp8ffITkS_b8Q1t_QG-a-WVAd4NUcjHhGibcsI,4670
-tests/test_utils.py,sha256=Mta5ZzYCgRAh1-BjcOvvx9iQ9DnnXLps7oDHxVQp2yE,8236
+tests/test_utils.py,sha256=GIXLdd-x4gnqSLrySXGk22phqPc8MegFd7ph1Jj8OcU,8182
 tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
 tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
@@ -26,10 +26,10 @@ tests/lora/test_lora.py,sha256=wJiF1P1BDnPN8TLX2tlFtdZ_QCkV-S9nPl6_uR6DqFc,4439
 tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
 tpu_inference/__init__.py,sha256=p4MaepRdN7723FUNE-3pOMxZWjFn4_TVFgjrNyty4JE,2304
 tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc,365
-tpu_inference/envs.py,sha256=hoPuT0SyLCxqyZ0QJIha6EXSZv2TpACfmENuiT0iJMM,3956
+tpu_inference/envs.py,sha256=ugze6VdQ_hG1IxUCbcgXZq7a22fZ-Lora3V_fkFOefw,5714
 tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
 tpu_inference/tpu_info.py,sha256=3iilHRQSFjwMJwhKcuuawTm7mhwkgHbj4zi6CiAySrs,2265
-tpu_inference/utils.py,sha256=Ddsx2CY2ARe46RZL27URzXCN3P6pMcKWB-APXUB8sHs,10098
+tpu_inference/utils.py,sha256=mHbjI8fxInPxagLsSUg-R3DzSz-X7WYNdoorPYoE3hg,10855
 tpu_inference/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
 tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
@@ -38,10 +38,10 @@ tpu_inference/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
 tpu_inference/core/sched/dp_scheduler.py,sha256=mKs8Ms46szdlBfo8hjdqis2ZKAZbcKnHAGfEr0X5R8g,22527
 tpu_inference/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/distributed/jax_parallel_state.py,sha256=5_xCwcL03lFPUoSO_OP7hIVKpUFroW1m-jVO7R6FbUc,2223
-tpu_inference/distributed/tpu_connector.py,sha256=w_gOI6hX7NWefaxN_9XH9TXReGElOyFifdDHpPswotM,29696
+tpu_inference/distributed/tpu_connector.py,sha256=kLaTwy6BrAThJeFkd1soJ47bBo5iGp4GjUJs7xFx4Tg,29696
 tpu_inference/distributed/utils.py,sha256=1KIREn28Zg10O-MSUkVQMRzS09WoGc_VLGOX4QTFJac,1504
 tpu_inference/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tpu_inference/executors/ray_distributed_executor.py,sha256=emYfSFJ3kluEmi6mlfnvxSUrC_mGVRVcjrUqUH2MR4g,16122
+tpu_inference/executors/ray_distributed_executor.py,sha256=9CnzWb8aurH1B0tJfMHB73F-RQBGqSf5DnymetBvZ5o,16225
 tpu_inference/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/experimental/llama3_jax_stashed.py,sha256=YK1oSIfto9ALo-HB45XfSrbq9XgVbE4m2C-9zRwmSzI,10913
 tpu_inference/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,7 +68,7 @@ tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py,sha256
 tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py,sha256=mw80bXBGenroGdrITV0F_EaI2s-Z9KWwqU9WodvJg14,97919
 tpu_inference/kernels/ragged_paged_attention/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=O179Fft5KpuN5LIFx3SghWXJJUqh3Og-xqfO4Z8QXYU,57032
-tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=X_d-SMGNc3zv396uQGL-73oLzp5ZQP8gaubMDebM_AY,57426
+tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=ArwrqIQiKIop_jaDKAMw656YHQ3IFZ0sRu9Cgycrtko,59858
 tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=k3LwduhZO85cJ-pSgnGN0c2Nn8eNeQq4eA94KUXJzMw,142198
 tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py,sha256=P3_ivi8iUz5QMU_3pgpl4Bkbmn0q0NpDtVJX39haRQA,11208
 tpu_inference/kernels/ragged_paged_attention/v3/util.py,sha256=1N_ozjKboDYLteFJndWoLXNudj2z53rGXMkELa5Z9tY,1102
@@ -78,7 +78,7 @@ tpu_inference/layers/common/attention_interface.py,sha256=SQZ-1I32Jqg7GGI-z4BVib
 tpu_inference/layers/common/attention_metadata.py,sha256=St8ZatbY1D7xQACKJH459jMgp3oTP3AQ36mi9FZdrPU,850
 tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
 tpu_inference/layers/common/quant_methods.py,sha256=mQSxZ44-QQtm22C_8ViejnP1cP2Dv6yc2YaP6oMKJeQ,185
-tpu_inference/layers/common/sharding.py,sha256=KUPd5HxfmQZ01wc3lGEusI6QYHnZxFp7-Ur-0b8hOH8,25256
+tpu_inference/layers/common/sharding.py,sha256=sjbwkDr2fP26Ob8f5cSDeDifr3eWFZMDHU4MKr7pIgQ,25217
 tpu_inference/layers/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/layers/jax/base.py,sha256=Vhts6ZMwNCZ8LbnEXeB0rl3nHdS5hDJWX7HEa7Fl7yE,5775
 tpu_inference/layers/jax/constants.py,sha256=NcYg0zAf3ClfP7YMYdYu_F1GngOzZaIxIAHBZDunKw4,2755
@@ -108,7 +108,7 @@ tpu_inference/layers/vllm/sharding.py,sha256=as7CF8UKTF3ToymwRY5Pi8uzwJk0P1sHPkW
 tpu_inference/layers/vllm/quantization/__init__.py,sha256=SEppGayBzzQ5tsXLSy99aqilkAawQwYxnv2alCg6-ZU,1777
 tpu_inference/layers/vllm/quantization/awq.py,sha256=-8ZmjGvSKJB6_JuwSctNWt8xHWq4VSvK_AK9iahlgCo,8495
 tpu_inference/layers/vllm/quantization/common.py,sha256=8XD64pPa077c9HThFhLFVHlDL9YBafnYwp6rp6gR44E,4432
-tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=3T3M0qLoW7GKdqbv_toMoQP39lV1qCoQ8Uc8l8aq1hg,14495
+tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=o661uiSvLvWGr8hQMl7TqYXJyALPREtNWlKHAM9AUrw,14541
 tpu_inference/layers/vllm/quantization/unquantized.py,sha256=nSRBzVurTiQQkF9FuSTshfRwfxfzs54E2_4eK7Eyhj0,15345
 tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=6idEyy3e849fZ1UeNvc9eSHYX7e6qvohrJa_d_D9MBk,5285
@@ -121,7 +121,7 @@ tpu_inference/lora/torch_lora_ops.py,sha256=pr3N7DVfkn3ANijUC6dBoiCtIJW4fdJpKdC3
 tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
 tpu_inference/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tpu_inference/models/common/model_loader.py,sha256=_eFPCM0_ssjoVdj38rMLR-qnJ7iW_Ox_hc8JiWycxNs,19923
+tpu_inference/models/common/model_loader.py,sha256=b3aigca81gMVJt42oF2aoRohQHjBBe3oK3IPblZAaUM,19996
 tpu_inference/models/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/models/jax/deepseek_v3.py,sha256=SKOHVEC-_2NLxBnzBzbu5tu0d6FTlAEiI1EefGaO2QE,40047
 tpu_inference/models/jax/gpt_oss.py,sha256=Vw4LRB5Kp6hbA2hjZGFS8kiEqOCjf881XH2JNtu2S1I,20924
@@ -139,36 +139,36 @@ tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=rrIrQWidkUnGilBHKNpdY
 tpu_inference/models/jax/utils/weight_utils.py,sha256=qFU53jPHPvIcs_EOdIH80oNojpUp7GdSY2E6NZNsjvM,21376
 tpu_inference/models/jax/utils/quantization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/models/jax/utils/quantization/mxfp4_utils.py,sha256=boGnqJCRIOf5nedAxQ8_IUTV6Rfll10DXnRC40BeeE8,3682
-tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=xgKoKB7AM3TYPxzVgEGLTK9ebQH2Kx8mNuO0heovkmk,26778
+tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=rzAFU3OtQvg8w8ow0V15rMljAsa4SBrwOye6OI8Bty4,26530
 tpu_inference/models/jax/utils/quantization/configs/fp8_all_modules_w_only.yaml,sha256=d_YHPtaRJ_7PBrPijSzJGnVeoJO62tKIGqrgFqpYT1k,137
 tpu_inference/models/jax/utils/quantization/configs/fp8_default.yaml,sha256=b7SyL75HuSTj3fN9_ZLCK_CDiccL5DGq_DddGmxj_qk,170
 tpu_inference/models/jax/utils/quantization/configs/int8_all_modules_w_only.yaml,sha256=0Qwij71zj9k6rmrUNd8Q5df9YYfkoJ1ZkgMAHxQy81k,128
 tpu_inference/models/jax/utils/quantization/configs/int8_default.yaml,sha256=lGec0UwwxmNPNgKPSsTsCMSXNJjhw507KMtM2NsSCMw,152
 tpu_inference/models/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=Cfsd0PjuR-hCoiCwPVdzjkE6AmHLYY1JQyBERyFkl-E,12344
+tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=3EcaD_1vZuyAZBfDtm5u_qfCahQU28qR4rAUraNAFqs,12305
 tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=yxlJHPmRQIAwlb1MmHK3xfXokgIkJ-evNU4PgyoJUdg,1187
 tpu_inference/platforms/__init__.py,sha256=lQCrKddS_GcGpCbeogvz9zOZD1mQw5bBsiw8On46qFQ,74
-tpu_inference/platforms/tpu_platform.py,sha256=W_19FvlFxPs0V0vcr3NI6oVBG-eA3eBV2-H0Cr3Kyco,10879
+tpu_inference/platforms/tpu_platform.py,sha256=F4jjPEFHFUTxdfWZYTBuUVJt6SYTFeWEKmrl74sX-Zk,10663
 tpu_inference/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
-tpu_inference/runner/compilation_manager.py,sha256=DR5TkHGin2QbRIWZlkkD5sUdxonTgr35pMYyrSwGk_U,37585
+tpu_inference/runner/compilation_manager.py,sha256=dU0Yk8f0LtRTBe2q0iB3xcMSRco_WPsj2wS6zZJ8WhY,40375
 tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
 tpu_inference/runner/kv_cache.py,sha256=F4dzW2d53xuxkFUn0oKzwE6VklGUeVm-QM19NVfIQDU,4577
-tpu_inference/runner/kv_cache_manager.py,sha256=2iwEc1vXt-p8kkKByvlqy4IKi5bOqFpOlrq0QmHHnQA,22450
+tpu_inference/runner/kv_cache_manager.py,sha256=N0a896CE7Zrs_d4ZSSzRdqgjV1It57RBDSIpOzkRqro,22013
 tpu_inference/runner/lora_utils.py,sha256=B4xMCgXGJ4VNdePvn89HH3tIZ-gYsQ7Vq_YCiYIATEY,3843
 tpu_inference/runner/multimodal_manager.py,sha256=azEPdHOwz8CN11MQmorGdtrCLbFaTCxdWyuEsZTzjYM,9778
-tpu_inference/runner/persistent_batch_manager.py,sha256=KERSfKy6XjMejnbtPGI3hzoYAHJLeCxmpZVYPqBCago,11156
+tpu_inference/runner/persistent_batch_manager.py,sha256=Otu67vOTf1_HKAMZgPDDHlRvvZ3YVJdz-QderH4qOII,13263
 tpu_inference/runner/speculative_decoding_manager.py,sha256=I3FDWKh2dn6nV8LgTGfCTwMKYnxQsTPpBIrmaJngXHs,10215
 tpu_inference/runner/structured_decoding_manager.py,sha256=gZQKQUFxh6xYYH9eGTdbguqk8hc2WwTrIdMMuCcbymE,3573
-tpu_inference/runner/tpu_runner.py,sha256=A5Ed4NL6CPNv7o7u6zqmdPbmmPyiIxFcwWlJ0E5_fpU,77991
-tpu_inference/runner/utils.py,sha256=ZnWUoNo-7INeB0mdXti1jwUOdbmxyExznOs-crRTQLk,17126
+tpu_inference/runner/tpu_runner.py,sha256=NBDKfSGShHmYpudrtGfo1hnVSQTcLpZV_nPiXEo7JPQ,79439
+tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
 tpu_inference/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tpu_inference/spec_decode/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tpu_inference/spec_decode/jax/eagle3.py,sha256=ci-yPOSlAfsuwoR_QAGrywtDLMbicjOhl787o9MahYg,17376
+tpu_inference/spec_decode/jax/eagle3.py,sha256=FxP0uWeQlHlgCpt1nY3FUd4lKlegKJljHyc05jJucaQ,19104
 tpu_inference/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tpu_inference/worker/tpu_worker.py,sha256=4QH83MzYCnubwWXTvPEc2BmiU2R5KILci6PawDNpnHM,20670
-tpu_inference-0.11.1.dev202511270815.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-tpu_inference-0.11.1.dev202511270815.dist-info/METADATA,sha256=nAfRlJUVGJkVnroEwrw0EsiO9CqWJLrGgHkt5AORBJk,5517
-tpu_inference-0.11.1.dev202511270815.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-tpu_inference-0.11.1.dev202511270815.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
-tpu_inference-0.11.1.dev202511270815.dist-info/RECORD,,
+tpu_inference/worker/tpu_worker.py,sha256=LnZcSNxdhh0NkoWXxS5bZ0bsTMduSANehy2wELAaVsY,20672
+tpu_inference-0.11.1.dev202512030818.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+tpu_inference-0.11.1.dev202512030818.dist-info/METADATA,sha256=oLzYFTCTvHDQLfyWoc8qV4IMYCoLRTiHECf08oT_bFA,5517
+tpu_inference-0.11.1.dev202512030818.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+tpu_inference-0.11.1.dev202512030818.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
+tpu_inference-0.11.1.dev202512030818.dist-info/RECORD,,

{tpu_inference-0.11.1.dev202511270815.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/WHEEL RENAMED Viewed

File without changes

{tpu_inference-0.11.1.dev202511270815.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{tpu_inference-0.11.1.dev202511270815.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/top_level.txt RENAMED Viewed

File without changes

tpu-inference 0.11.1.dev202511270815__py3-none-any.whl → 0.11.1.dev202512030818__py3-none-any.whl

Potentially problematic release.

tpu-inference 0.11.1.dev202511270815py3-none-any.whl → 0.11.1.dev202512030818py3-none-any.whl