tpu-inference 0.12.0.dev20251222__py3-none-any.whl → 0.12.0.dev20251224__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. tests/core/test_dp_scheduler.py +128 -71
  2. tests/e2e/test_data_parallel.py +176 -280
  3. tests/e2e/test_hybrid_kvcache.py +219 -0
  4. tests/e2e/test_speculative_decoding.py +26 -6
  5. tests/layers/jax/test_qwix.py +1 -1
  6. tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py +36 -21
  7. tests/layers/vllm/test_compressed_tensors_w8a8_int8.py +36 -21
  8. tests/layers/vllm/test_mxfp4.py +25 -10
  9. tests/layers/vllm/test_unquantized.py +61 -31
  10. tests/layers/vllm/utils.py +19 -4
  11. tests/models/common/test_model_loader.py +2 -2
  12. tests/models/jax/test_qwen2_5_vl.py +10 -11
  13. tests/runner/test_multimodal_manager.py +3 -3
  14. tests/runner/test_tpu_runner.py +67 -8
  15. tests/runner/test_tpu_runner_dp.py +66 -0
  16. tpu_inference/core/sched/dp_scheduler.py +65 -40
  17. tpu_inference/kernels/mla/v1/kernel.py +7 -26
  18. tpu_inference/layers/common/sharding.py +8 -3
  19. tpu_inference/layers/jax/attention/deepseek_v3_attention.py +3 -3
  20. tpu_inference/layers/jax/attention/gpt_oss_attention.py +3 -3
  21. tpu_inference/layers/jax/attention/llama4_attention.py +3 -4
  22. tpu_inference/layers/jax/sample/sampling.py +1 -1
  23. tpu_inference/layers/vllm/fused_moe.py +51 -47
  24. tpu_inference/layers/vllm/quantization/common.py +14 -13
  25. tpu_inference/layers/vllm/quantization/mxfp4.py +21 -7
  26. tpu_inference/layers/vllm/quantization/unquantized.py +19 -7
  27. tpu_inference/layers/vllm/sharding.py +7 -4
  28. tpu_inference/models/common/model_loader.py +11 -14
  29. tpu_inference/models/jax/llama3.py +13 -10
  30. tpu_inference/models/jax/llama_guard_4.py +1 -1
  31. tpu_inference/models/jax/qwen2.py +3 -2
  32. tpu_inference/models/jax/qwen2_5_vl.py +4 -4
  33. tpu_inference/models/jax/utils/multi_modal_utils.py +4 -4
  34. tpu_inference/models/jax/utils/qwix/qwix_utils.py +3 -3
  35. tpu_inference/models/vllm/vllm_model_wrapper.py +5 -2
  36. tpu_inference/platforms/tpu_platform.py +7 -7
  37. tpu_inference/runner/compilation_manager.py +43 -33
  38. tpu_inference/runner/kv_cache_manager.py +1 -2
  39. tpu_inference/runner/multimodal_manager.py +1 -1
  40. tpu_inference/runner/tpu_runner.py +12 -9
  41. tpu_inference/utils.py +31 -30
  42. tpu_inference/worker/tpu_worker.py +5 -2
  43. {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/METADATA +1 -1
  44. {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/RECORD +47 -46
  45. {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/WHEEL +0 -0
  46. {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/licenses/LICENSE +0 -0
  47. {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import functools
16
- from typing import TYPE_CHECKING, Dict, List
16
+ from typing import TYPE_CHECKING, List
17
17
 
18
18
  import jax
19
19
  import jax.numpy as jnp
@@ -212,7 +212,6 @@ class KVCacheManager:
212
212
  # uniform page size.
213
213
  representative_spec = kv_cache_config.kv_cache_groups[0].kv_cache_spec
214
214
  page_size_bytes = representative_spec.page_size_bytes
215
- self.runner.layer_name_to_kvcache_index: Dict[str, int] = {}
216
215
  kv_caches = self.runner.kv_caches
217
216
  num_blocks_list = []
218
217
  for i, kv_cache_tensor in enumerate(kv_cache_config.kv_cache_tensors):
@@ -148,7 +148,7 @@ class MultiModalManager:
148
148
  # 2. A list or tuple (length: num_items) of tensors, each of shape
149
149
  # (feature_size, hidden_size) in case the feature size is dynamic
150
150
  # depending on the input multimodal items.
151
- curr_group_outputs = self.runner.get_multimodal_embeddings_fn(
151
+ curr_group_outputs = self.runner.embed_multimodal_fn(
152
152
  self.runner.state, image_grid_thw, **batched_mm_inputs)
153
153
 
154
154
  sanity_check_mm_encoder_outputs(
@@ -282,6 +282,9 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
282
282
  self._substitute_placeholder_token_fn = _substitute_placeholder_token
283
283
  self.execute_model_state: ExecuteModelState | None = None
284
284
 
285
+ self.kv_caches: list[jax.Array] = []
286
+ self.layer_name_to_kvcache_index: dict[str, int] = {}
287
+
285
288
  def _init_random(self):
286
289
  if self.model_config.seed is None:
287
290
  self.model_config.seed = 0
@@ -508,10 +511,10 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
508
511
  multimodal_fns = multimodal_fns or {}
509
512
  self.precompile_vision_encoder_fn = multimodal_fns.get(
510
513
  "precompile_vision_encoder_fn", None)
511
- self.get_multimodal_embeddings_fn = multimodal_fns.get(
512
- "get_multimodal_embeddings_fn", None)
513
- self.get_input_embeddings_fn = multimodal_fns.get(
514
- "get_input_embeddings_fn", None)
514
+ self.embed_multimodal_fn = multimodal_fns.get("embed_multimodal_fn",
515
+ None)
516
+ self.embed_input_ids_fn = multimodal_fns.get("embed_input_ids_fn",
517
+ None)
515
518
  self.get_mrope_input_positions_fn = multimodal_fns.get(
516
519
  "get_mrope_input_positions_fn", None)
517
520
 
@@ -523,7 +526,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
523
526
  jax.random.key(self.model_config.seed)).params()
524
527
  self.is_multimodal_model = (
525
528
  self.model_config.is_multimodal_model
526
- and self.get_multimodal_embeddings_fn is not None and hasattr(
529
+ and self.embed_multimodal_fn is not None and hasattr(
527
530
  self.model_config.hf_config, "architectures"
528
531
  ) #TODO: Remove Llama Guard 4 specific condition once the LG4 Vision portion is implemented
529
532
  and len(self.model_config.hf_config.architectures) >= 1
@@ -545,7 +548,6 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
545
548
  self.topology_order_id = topology_order_id
546
549
  self.kv_cache_config = kv_cache_config
547
550
  self.use_hybrid_kvcache = len(kv_cache_config.kv_cache_groups) > 1
548
- self.kv_caches = []
549
551
  self.kv_cache_manager.initialize_kv_cache(kv_cache_config)
550
552
  if has_kv_transfer_group():
551
553
  get_kv_transfer_group().register_runner(self)
@@ -827,7 +829,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
827
829
  sharding = None
828
830
  if self.dp_size > 1:
829
831
  sharding = NamedSharding(self.mesh,
830
- PartitionSpec(ShardingAxisName.ATTN_DATA))
832
+ PartitionSpec(ShardingAxisName.MLP_DATA))
831
833
 
832
834
  tpu_sampling_metadata = TPUSupportedSamplingMetadata.from_input_batch(
833
835
  self.mesh, self.input_batch, padded_num_reqs, sharding=sharding)
@@ -1390,7 +1392,8 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
1390
1392
  self.mesh,
1391
1393
  self.input_batch,
1392
1394
  padded_num_reqs,
1393
- sharding=data_parallel_attn_sharding,
1395
+ sharding=NamedSharding(self.mesh,
1396
+ PartitionSpec(ShardingAxisName.MLP_DATA)),
1394
1397
  )
1395
1398
  if self.uses_mrope:
1396
1399
  positions = mrope_positions
@@ -1680,7 +1683,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
1680
1683
  def _get_input_ids_embeds(self, input_ids: jax.Array,
1681
1684
  mm_embeds: list[jax.Array]):
1682
1685
  if self.is_multimodal_model:
1683
- inputs_embeds = self.get_input_embeddings_fn(
1686
+ inputs_embeds = self.embed_input_ids_fn(
1684
1687
  self.state,
1685
1688
  input_ids,
1686
1689
  mm_embeds,
tpu_inference/utils.py CHANGED
@@ -3,7 +3,7 @@ import time
3
3
  from collections import defaultdict
4
4
  from collections.abc import Sequence
5
5
  from functools import wraps
6
- from typing import Any, Callable, List, Tuple
6
+ from typing import Any, Callable, List, Tuple, Union
7
7
 
8
8
  import jax
9
9
  import jax.numpy as jnp
@@ -283,35 +283,6 @@ def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
283
283
  return utils.hashing.get_hash_fn_by_name(hash_fn_name)
284
284
 
285
285
 
286
- def quantize_kv(key: jax.Array, value: jax.Array,
287
- kv_cache_quantized_dtype: jnp.dtype, k_scale: float,
288
- v_scale: float) -> Tuple[jax.Array, jax.Array]:
289
- """
290
- Quantize the key and value tensors.
291
-
292
- Args:
293
- key: The key tensor to quantize.
294
- value: The value tensor to quantize.
295
- kv_cache_quantized_dtype: The dtype to quantize the key and value tensors to.
296
- q_scale: The scale to quantize the key and value tensors by.
297
- k_scale: The scale to quantize the key tensor by.
298
- v_scale: The scale to quantize the value tensor by.
299
-
300
- Returns:
301
- Tuple[jax.Array, jax.Array]: The quantized key and value tensors.
302
- """
303
- dtype_info = jnp.finfo(kv_cache_quantized_dtype)
304
- minval, maxval = float(dtype_info.min), float(dtype_info.max)
305
- key = key.astype(jnp.float32) / k_scale
306
- key = jnp.clip(key, minval, maxval)
307
- key = key.astype(kv_cache_quantized_dtype)
308
- value = value.astype(jnp.float32) / v_scale
309
- value = jnp.clip(value, minval, maxval)
310
- value = value.astype(kv_cache_quantized_dtype)
311
-
312
- return key, value
313
-
314
-
315
286
  def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
316
287
  """
317
288
  Get the JAX dtype from a string dtype.
@@ -326,6 +297,36 @@ def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
326
297
  return to_jax_dtype(str_dtype)
327
298
 
328
299
 
300
+ def get_mesh_shape_product(
301
+ mesh: Mesh,
302
+ axes: Union[str, list[str], None],
303
+ ) -> int:
304
+ """
305
+ Get the product of mesh dimensions for one or more axes.
306
+
307
+ Examples:
308
+ # Single axis (defaults to 1 if not present)
309
+ get_mesh_shape_product(mesh, "model")
310
+
311
+ # Multiple axes - computes product of their sizes
312
+ get_mesh_shape_product(mesh, ["model", "attn_dp"])
313
+
314
+ # None means no sharding on this dimension
315
+ get_mesh_shape_product(mesh, None) # returns 1
316
+ """
317
+ if axes is None:
318
+ return 1
319
+
320
+ if isinstance(axes, str):
321
+ axes = [axes]
322
+
323
+ product = 1
324
+ for axis in axes:
325
+ product *= mesh.shape.get(axis, 1)
326
+
327
+ return product
328
+
329
+
329
330
  def time_function(func):
330
331
  """
331
332
  A decorator to measure the execution time of a function.
@@ -431,8 +431,11 @@ class TPUWorker:
431
431
  ) -> None:
432
432
  """Allocate GPU KV cache with the specified kv_cache_config."""
433
433
  # Precompile functions with large vocab_size tensors before allocating KV cache to avoid OOM
434
- self.model_runner.compilation_manager._precompile_sampling()
435
- self.model_runner.compilation_manager._precompile_gather_logprobs()
434
+ if not (envs.SKIP_JAX_PRECOMPILE or
435
+ (hasattr(self.model_runner.model_config, "enforce_eager")
436
+ and self.model_runner.model_config.enforce_eager)):
437
+ self.model_runner.compilation_manager._precompile_sampling()
438
+ self.model_runner.compilation_manager._precompile_gather_logprobs()
436
439
  self.model_runner.initialize_kv_cache(kv_cache_config,
437
440
  self.topology_order_id)
438
441
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tpu_inference
3
- Version: 0.12.0.dev20251222
3
+ Version: 0.12.0.dev20251224
4
4
  Author: tpu_inference Contributors
5
5
  Classifier: Development Status :: 3 - Alpha
6
6
  Classifier: Intended Audience :: Developers
@@ -7,21 +7,22 @@ tests/core/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
7
7
  tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
8
8
  tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
9
9
  tests/core/test_disagg_utils.py,sha256=A5icdqkJlau2PHYAxHfHKuqrlEKXVJu2nm02XOrXjcc,2530
10
- tests/core/test_dp_scheduler.py,sha256=H2IMS3FhG2hdYhUhdLjIfcVswLOm7rUW5EMsSP9mMc8,32197
10
+ tests/core/test_dp_scheduler.py,sha256=m6ph_OH9tXz6AxNde8cIjptd1lwDVSCqIV2Ef-cNJFk,34253
11
11
  tests/core/test_init.py,sha256=5BDDC-dmDtWEGaBPjQSiYJuMiwTBVRSDx9p7Cv8DKyI,2262
12
12
  tests/distributed/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
13
13
  tests/distributed/test_distributed_utils.py,sha256=YXKbSG9J72vCrU5mPiFf1ya-Yzc1BjeahdBmQVez8Wc,5031
14
14
  tests/distributed/test_tpu_connector.py,sha256=ajKeRUi3x29hQXfLrSlo6yDczpwZsg_mGt2vKBGRZdk,20538
15
15
  tests/e2e/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
16
16
  tests/e2e/test_async_scheduler.py,sha256=215xGuyTEBSOe-c1l48TIjrCqhbVFZY3m5p3q5mU7jA,6905
17
- tests/e2e/test_data_parallel.py,sha256=yiBgGgA4SSmEF_agKKy4sexBatLfNS_Z9uma6fC0dBE,14403
17
+ tests/e2e/test_data_parallel.py,sha256=KB-_BKic_iZyn4WbPWsUdVClinzd8g7PrQ0ui5B-nwo,10725
18
+ tests/e2e/test_hybrid_kvcache.py,sha256=Y7a-grjvAKBbp7vbQncVEQKGM1WxcwO0qa2o0opKiEI,8076
18
19
  tests/e2e/test_local_disagg.py,sha256=xIjYI6RGA6bZk4dluklhfYBoJGbHkrSihSkJtPgpZv4,10434
19
20
  tests/e2e/test_model_loader.py,sha256=DYlS420KXkNzeIijAf-0UQsYH0pOAGcXRl6P99PBiAc,9366
20
21
  tests/e2e/test_multi_modal_inference.py,sha256=hVatj8Rra6XAekp6zBxRivQUcGiV8SimPph9cZ-TJyk,3896
21
22
  tests/e2e/test_pipeline_parallel.py,sha256=VpxY9wgQj3-i0XooHZHdmHGdMS3ilmHbxu6ZfyQDUP0,9519
22
23
  tests/e2e/test_runai_model_streamer_loader.py,sha256=MXUxKfKV7vVM_LI7-5hBV-wCswogPENkMPsREUjFu3I,3790
23
24
  tests/e2e/test_sampling_params.py,sha256=ibLWtJfS35HughdOBtXD2IcyWPXoZA4R4KwXz-RzgOY,10683
24
- tests/e2e/test_speculative_decoding.py,sha256=XJo3kgXdEGKOYzloqq5b8JB53ECfSxuI-6usvE4ZLho,9158
25
+ tests/e2e/test_speculative_decoding.py,sha256=tj3VSJEi7r9aHjywZanlmfY4eS5Tfr5zPe9TH3PW5EY,9911
25
26
  tests/e2e/test_structured_decoding.py,sha256=QYh9WjGrzm7syeLrGUawA6cOkWlQqVpTn7W6qwt65NY,1863
26
27
  tests/executors/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
27
28
  tests/executors/test_ray_distributed_executor.py,sha256=rMazBfirGsehEUXgpIPJkw0z7xO4cnK2kzcgxjFA6Bo,8435
@@ -44,7 +45,7 @@ tests/layers/common/test_attention_interface.py,sha256=ke6h-e8CP-FhNY_ojKCYwyHgY
44
45
  tests/layers/common/test_quantization.py,sha256=JcwDrNTm6UlBSV3s3mwwvpxOjqBpZDJwnYYoj3DnS7A,5344
45
46
  tests/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
46
47
  tests/layers/jax/test_layers.py,sha256=L1xh_wniBtlfudya_WRmHUWOhEno0i6ikKE1XiBtaZs,5010
47
- tests/layers/jax/test_qwix.py,sha256=G7PrmkWkhQD8P0RvwnF-iyRoXO4d7g1Ce4ycaIjDQ_0,39727
48
+ tests/layers/jax/test_qwix.py,sha256=V8MpFKJb5_evs-Z4WeZ5SxA-KAyFD6Qrex7ExywLxmE,39744
48
49
  tests/layers/jax/test_rope.py,sha256=0biwYRSRsKMaRHknc8v8Tfrt0bmJKQGeQLPqR_D04mM,3565
49
50
  tests/layers/jax/test_sharding.py,sha256=Hk1MWhIluOKIBx7-O9fKa1n6fF3SW7UMYsRI9AGzp_0,5914
50
51
  tests/layers/jax/test_transformer_block.py,sha256=Wpgowc0ZJnv1GUxcK-Op6CCYWjpqgUM0p3EANk-YWzc,5742
@@ -62,12 +63,12 @@ tests/layers/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs
62
63
  tests/layers/vllm/test_attention.py,sha256=NSbeKIi4eQj9RLiHeT-aEDvvsiHYbD3rk4uXq3_5_X8,13193
63
64
  tests/layers/vllm/test_awq.py,sha256=0aFURqn3zh0Ueytvfzy6SGon0gPRzk8Dn0DuCnpu_XQ,14479
64
65
  tests/layers/vllm/test_compressed_tensors_moe.py,sha256=jlMZcbQWlgaLX4pAlEMjZbJ7a0NyjxIhqXUW5DGH6KM,7385
65
- tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py,sha256=dkIBrnvOJkmTtoakiuIs66zpeoUSzZNL0i4eoZzdMRM,14347
66
- tests/layers/vllm/test_compressed_tensors_w8a8_int8.py,sha256=9fRxMVjzrrHCwKsLyShhk5AfnWWE-h2sdjCAjyRyqAE,15381
66
+ tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py,sha256=eTXSWaPcKgOEAyWt7Xqqkhd0fa2J3QFa3QJBRQjTsaY,15521
67
+ tests/layers/vllm/test_compressed_tensors_w8a8_int8.py,sha256=dCKHPos33MIwJnK96zlIUvJYN0PpRPXdyPyo-PnKH3U,16555
67
68
  tests/layers/vllm/test_fp8.py,sha256=ZvFTg4Umgg6W2RwElkIZ_Rls_XZJ8sEW7yww2K3ztf4,666
68
- tests/layers/vllm/test_mxfp4.py,sha256=CTxh0h0iG17Rvk4eM62sTGcNG-1rPp7FNomt_9zMW5o,10828
69
- tests/layers/vllm/test_unquantized.py,sha256=iP5qWKCiqkb4BIjuZA_PepCxMrXJv9rBZ9y5vxbWZ6M,22446
70
- tests/layers/vllm/utils.py,sha256=bozp79XO3ofy1I6aIwjEtfo9vmKoOGYIC9YN1ba4sIA,2502
69
+ tests/layers/vllm/test_mxfp4.py,sha256=sFer788F7pbDUtB0yB6WU9Lh9kzUOaxXP6XXVsvQHrc,11625
70
+ tests/layers/vllm/test_unquantized.py,sha256=iqoqib_Rv2DdmKA2ub6T6cIT67PSTc3s7gpYzBTs_qI,24432
71
+ tests/layers/vllm/utils.py,sha256=Qk67IqSrSovhPlWmDGFBr5vwgwtG7kcUzy69-oPgR0A,3105
71
72
  tests/lora/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
72
73
  tests/lora/conftest.py,sha256=OI4gPV4vNOCcfE93ccmIWQHd8-Gp9c2yGVlaSnuT4Tg,1559
73
74
  tests/lora/test_bgmv.py,sha256=B1HCjh27379vCxZsd8nKMBZ8lr1JamuuWDgYiALyn18,1934
@@ -77,7 +78,7 @@ tests/lora/test_lora_perf.py,sha256=zcZud9Hexx6wa9qX0IvnjKyDD-i61NdIQrVO31Yx3vU,
77
78
  tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
78
79
  tests/models/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
79
80
  tests/models/common/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
80
- tests/models/common/test_model_loader.py,sha256=78JHAh7zoCccYGwSr6Y2uXAOLY3qircICDszj4sRuic,17402
81
+ tests/models/common/test_model_loader.py,sha256=Sf-k_Kxdjkz-lS_0-ICfA4Yk2VXX33esP8PNG4B7FzA,17392
81
82
  tests/models/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
82
83
  tests/models/jax/test_deepseek_v3.py,sha256=9RY6ypfvPts3NOnvWu9n_T7pUjrvj_QY_saLOKpFg4c,16243
83
84
  tests/models/jax/test_llama3.py,sha256=NYsT35yh9GzkYYcLcOo1BkBGGr14E89GtdCJJ6SFhI8,6610
@@ -85,7 +86,7 @@ tests/models/jax/test_llama4.py,sha256=MMQzTymnVUdWZ6XoOD8k9Q2ikmAk6tFSGB1C5DCi7
85
86
  tests/models/jax/test_llama_eagle3.py,sha256=DCk1ae9SLJUrqyx7uvNOmpqAAM09xb0rYNOst-Leo_M,7777
86
87
  tests/models/jax/test_llama_guard_4.py,sha256=w-8cKwuTRFyzDh2mxvAofrt5xUprZyqRm5DRVRamGwE,9322
87
88
  tests/models/jax/test_qwen2.py,sha256=xylG-LmHBSy76V-Yl5KiAXogpZPM2w3Mx0E61Ud5sO4,6227
88
- tests/models/jax/test_qwen2_5_vl.py,sha256=Wy9rlizi0t9Afks-wBWOYbNRnAgvvOOXgoRMiYxjzqE,26339
89
+ tests/models/jax/test_qwen2_5_vl.py,sha256=PfB_gecAvXNrksxt8E56yP6d8ioZZWMoUIvh-OrbzJ4,26299
89
90
  tests/models/jax/test_qwen3.py,sha256=NWLAZPwGIhZjW0OADk4JqU4ZPn8JGSGPwkbTQvKEc50,6021
90
91
  tests/models/jax/test_weight_loading.py,sha256=RlmByQcjrsefybeNlS9wnL522be6CSR7YLcb7O5eZ-A,5205
91
92
  tests/models/jax/utils/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
@@ -97,12 +98,12 @@ tests/runner/test_block_table.py,sha256=gFGF425mpWfOLjnQeQiG18TqFko8vpilJ3AiiiV1
97
98
  tests/runner/test_input_batch.py,sha256=7nEkB00JrhaKCKf1ep28iedYbNbuqEdaQAxYqHaXThc,8198
98
99
  tests/runner/test_kv_cache.py,sha256=TvxmJNI8lM0ZNllZonHySA8NCQZ7prBgNODpYEI787E,7394
99
100
  tests/runner/test_kv_cache_manager.py,sha256=dYVWQamfGwqytnumfvjRt2r3n9BRBqcSbCXGWnw1SXs,22461
100
- tests/runner/test_multimodal_manager.py,sha256=EmQ_SsfdD7xroKNsN-q2VEs8fLkeQEWLCNxr9fxOnGc,18618
101
+ tests/runner/test_multimodal_manager.py,sha256=8RbHHMvRuHg1Scc0b70tsr-tF2lfk8SZVx3InVgIryc,18591
101
102
  tests/runner/test_persistent_batch_manager.py,sha256=EW6P-BtI4i59Clx-Lh84fU1GtDKF3Av2gtO-rCRYN_k,3148
102
103
  tests/runner/test_speculative_decoding_manager.py,sha256=HgemtiBL_VhBheUgem3OpPj6yBK9vdJsL8VCABQdGXw,16093
103
104
  tests/runner/test_structured_decoding_manager.py,sha256=pVX3z2TLR6SfBoEyRtv0BPajHbMVdcOAe4opMoxEpps,9802
104
- tests/runner/test_tpu_runner.py,sha256=zR36Jjrb1cOygQnbJ6LlpoWzb8Ix76fHs7jTWH2GVwE,9004
105
- tests/runner/test_tpu_runner_dp.py,sha256=eGC_pUQzivCjCEeLtI4ZruMY2Qg34Oj6dHhtUKvtyXY,47901
105
+ tests/runner/test_tpu_runner.py,sha256=H1RjGGvNPfNNhglbiUs9J2QsokXaDtnmmtdoYRvA5_8,11649
106
+ tests/runner/test_tpu_runner_dp.py,sha256=TAEmI-JaIodgYNjjjQAAQg-q0bSbeVON5ZZE2jngfOk,50851
106
107
  tests/runner/test_tpu_runner_mesh.py,sha256=kDyjdnd0vO4GQrcOAPLr9TEYA49-qDFE4gHt9IL6wlk,8638
107
108
  tests/runner/test_utils.py,sha256=_R2bnKttqgg7vfPXP0Qfx38mr-4UBm2UMIbuQFAwgWk,15442
108
109
  tests/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
@@ -114,13 +115,13 @@ tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc
114
115
  tpu_inference/envs.py,sha256=A1Bdm5qiXhTdu-Q_yNzBpi79_nOJIDbdFF7MAMqmjxo,6662
115
116
  tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
116
117
  tpu_inference/tpu_info.py,sha256=lty-ngN1uUvQLlFGkWa2u5eEb5anwmcv_uyI0S95PdY,2840
117
- tpu_inference/utils.py,sha256=Gx9AKphXvY5ltfXL5DNKEH-I7LN6V4ZIv7cqTgxMtaI,11088
118
+ tpu_inference/utils.py,sha256=0fQXcZJ4IiPGlNv_bLdkla5FeEEKEzyTsSDH-y47ouo,10641
118
119
  tpu_inference/core/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
119
120
  tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
120
121
  tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
121
122
  tpu_inference/core/disagg_utils.py,sha256=lv8MAVoAjtcmTaenUXVokg2q3d0tzsma86UiQlQ3omY,1492
122
123
  tpu_inference/core/sched/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
123
- tpu_inference/core/sched/dp_scheduler.py,sha256=b55aIN6EzRpKD5inItO1klHmhRoq0gb1fdnV01N3Nbw,33345
124
+ tpu_inference/core/sched/dp_scheduler.py,sha256=-7d2zopJ5ZJFIJ8LbHsm_4bBBtP7qrim4XWVPDF6vrg,34960
124
125
  tpu_inference/distributed/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
125
126
  tpu_inference/distributed/jax_parallel_state.py,sha256=xMK0tEtblh37_LoHvp1-6qPI8AgX4HkE0ATuc7fdHKs,2798
126
127
  tpu_inference/distributed/tpu_connector.py,sha256=3rR0y2P1MOOSM8nBfvl95ZQcVKMms3rL8zTdnxUmSms,29946
@@ -144,7 +145,7 @@ tpu_inference/kernels/megablox/common.py,sha256=CoJPNom6anJU9B4i05d2skytJEvNS994
144
145
  tpu_inference/kernels/megablox/gmm.py,sha256=rVW70SGPshR9XvHiwzmskX4_yeD4nE8or3RfabwcCLM,24240
145
146
  tpu_inference/kernels/mla/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
146
147
  tpu_inference/kernels/mla/v1/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
147
- tpu_inference/kernels/mla/v1/kernel.py,sha256=jLV3KvNj1sxIuGymKva-qYxVTuO2vxRG12JFbZ6utBs,50796
148
+ tpu_inference/kernels/mla/v1/kernel.py,sha256=oovjb0x3qz08IL_KVjLLbNbcEcFXip55fqgIgfnl3RA,49758
148
149
  tpu_inference/kernels/quantized_matmul/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
149
150
  tpu_inference/kernels/quantized_matmul/kernel.py,sha256=-A9Kd2ApHWgPvCaUPfjM5JooLz_iCfWV1UT0taaZaAo,16264
150
151
  tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py,sha256=3zhIm73JEE8qOty2_0v3AJlVz13k6qMB5wlXBDyC1EM,35130
@@ -167,7 +168,7 @@ tpu_inference/layers/common/attention_metadata.py,sha256=rmipY517sefHe4owxC5USkm
167
168
  tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
168
169
  tpu_inference/layers/common/quant_methods.py,sha256=SCm9g7bE02XSMONmOCuT0vfHeTP6RzGQ57aTj919HgM,772
169
170
  tpu_inference/layers/common/quantization.py,sha256=cTuoCpU3qBdPvoy_6R6uwCyz9ojh6esvl9x3bQeMbs4,8710
170
- tpu_inference/layers/common/sharding.py,sha256=Oh0aFBVXo1eLnfukSSc7ppPo23Rwlf7VGNiMR7Yxtfo,25985
171
+ tpu_inference/layers/common/sharding.py,sha256=curCejZPj8ND4rxjWEbwRozkFYlK_HlpIyTywhDHcWU,26171
171
172
  tpu_inference/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
172
173
  tpu_inference/layers/jax/base.py,sha256=UhT4ut_59ynUPdaZGpMPSCQkPTWXA9BxkaPy7lDhoLI,6350
173
174
  tpu_inference/layers/jax/constants.py,sha256=YQJOeAbja1yTbPhoOWMp24OF1RCMwPybK1NIwPrrYJ0,3329
@@ -179,28 +180,28 @@ tpu_inference/layers/jax/rope_interface.py,sha256=cPqVpKG5_SU7S7xcrMEaPBJLqi1nC4
179
180
  tpu_inference/layers/jax/transformer_block.py,sha256=HTI0fYPQd23UbnJSB_pL2K3un3q_i3guvJiNCUReVRs,4492
180
181
  tpu_inference/layers/jax/attention/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
181
182
  tpu_inference/layers/jax/attention/attention.py,sha256=_N5W4ox8EzC1CZYcIhsEi35X8WCIMFEBlSzVtDDcTu8,10623
182
- tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=DIT6FEEdAdOpH8Qpq0kt4sNCBKRi4xFJ8y6rq5Qp4ag,22370
183
- tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=adzhOODAT-fFp6L6joGfHQEfwhm2PX-jv4lY9mCz85o,9233
184
- tpu_inference/layers/jax/attention/llama4_attention.py,sha256=Q36xTJe9IZqIGZca4vv4o7OpxzZS6XWXbW4UV-EmNW0,6737
183
+ tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=KP-hgck-wTzTcwDNB08DwNiqsE-6OD4tQ1jLVwWQvEw,22427
184
+ tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=EM1kJpr77VHh95aSD5UnSJazB_anS_7PyaD8TixVMrY,9241
185
+ tpu_inference/layers/jax/attention/llama4_attention.py,sha256=QzBDoEioI9mMdI1T2LNlsr89iaGl234e-9s202YWS8M,6713
185
186
  tpu_inference/layers/jax/moe/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
186
187
  tpu_inference/layers/jax/moe/deepseek_v3_moe.py,sha256=5j6TJO8fAB2Yv6mVAeM2F9WLe4QDM9bf6zxtdKjHjCQ,26456
187
188
  tpu_inference/layers/jax/moe/gpt_oss_moe.py,sha256=-uliFqHJFOTT9WJCEpGhkImOXMSoo3aePXMOmKXlgmk,6771
188
189
  tpu_inference/layers/jax/moe/moe.py,sha256=E7L8bJucTVke89o048GAbWdtuQIL5oDz-MkW0NK4E00,10114
189
190
  tpu_inference/layers/jax/sample/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
190
191
  tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=VqN0mxi7Xg58w4EXS625ndC8NyA_UZMV9bjFM1mkvrY,21000
191
- tpu_inference/layers/jax/sample/sampling.py,sha256=kAH3S9LZTPCegkFgJ1XSI5rO64v6jAcc7jr3G5mnY3o,3889
192
+ tpu_inference/layers/jax/sample/sampling.py,sha256=IfJBFSXuTdd0QELn8Opmh7HgdzKreIwGYUOskTFp4aI,3888
192
193
  tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=bip7TQcw-VHyN6072zBQY-tA0-QTyJpnuYg04mw9Sv0,3136
193
194
  tpu_inference/layers/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
194
195
  tpu_inference/layers/vllm/attention.py,sha256=LMQbS2KAup0Q-mmN5pzV6uUs-qdGpTSH8eV6ByHde9g,7370
195
- tpu_inference/layers/vllm/fused_moe.py,sha256=CYHksNA3FYWsNNS5d5dIdkaq6s41EMAB1sIPRSxY7RM,18747
196
+ tpu_inference/layers/vllm/fused_moe.py,sha256=NdMVpDLI5-5274EuhVtH8KZzCnLBqSZSSvRoZqzwY7s,19868
196
197
  tpu_inference/layers/vllm/linear_common.py,sha256=--jpy8vf0RkZ0jDU1QfXT-V-RnjIloNSodYQKiw4Txo,9129
197
- tpu_inference/layers/vllm/sharding.py,sha256=f8iZHQOO4cxl8GO_2_4SKaky9QKXnz6OAtiT40F1Sgo,9744
198
+ tpu_inference/layers/vllm/sharding.py,sha256=f3pu7CJNRkfq5j1bmhmTM5wU9HwAePH3yWeTmaIINAw,9926
198
199
  tpu_inference/layers/vllm/quantization/__init__.py,sha256=XYe1VwgoFqLTuLJ-i-64hzNNMSWOkoErLTA_4N_Cze0,2463
199
200
  tpu_inference/layers/vllm/quantization/awq.py,sha256=nlWwR08lPlE_HIXLoDiGS2pOAJEiI0ukUGXos0NpbAE,9072
200
- tpu_inference/layers/vllm/quantization/common.py,sha256=139fpfCNJCBBMUSbmAXs7UjrZA68mfTN7uOGvtUd1yk,4899
201
+ tpu_inference/layers/vllm/quantization/common.py,sha256=GXYEvnhvRF8VWX0SHkzRpV3_LoQgAwCmXwLbEEwSm8A,5021
201
202
  tpu_inference/layers/vllm/quantization/fp8.py,sha256=_NT7QOD-N3UAJnYSDJD24Tsp8FaSK6NuDYp78QOTyzo,4530
202
- tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=JIaMQ8mco7l-rvEgnPAb_HbKJ4sxMqihpIWmodZ_yT0,17581
203
- tpu_inference/layers/vllm/quantization/unquantized.py,sha256=RVjjRsn6kcKu5RfRySrOHmCV-W8jjIwX2iYit3qygy8,16149
203
+ tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=oS7e40ovqmipDKiHfpezzdP2RaFSNeXw6zv2nTrwKvc,18214
204
+ tpu_inference/layers/vllm/quantization/unquantized.py,sha256=LPGRKw3lkeCHCJsY70P_hDUagnmI5bNe1cHuTFUEBkc,16701
204
205
  tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
205
206
  tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=2RS8owCqKHXZbtWKNjdKtsfzKH9N60UyqD-ug1A83oE,5914
206
207
  tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=aOme0LFnhVeXDxdSw-Z0k5MZutM3-EoF2vwffezCARE,11277
@@ -212,49 +213,49 @@ tpu_inference/lora/torch_lora_ops.py,sha256=YR3Hj8nLLiQ-6wXy4uFsjQxFTbJYZ4o5dh_L
212
213
  tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
213
214
  tpu_inference/models/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
214
215
  tpu_inference/models/common/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
215
- tpu_inference/models/common/model_loader.py,sha256=dzir8-QpqJN680XbBAl3E8eUXeYeniNRwYvZCFvWfBc,21878
216
+ tpu_inference/models/common/model_loader.py,sha256=TnTTION_J3nMGsIMJFuMzoBSl3VHHvhretmF5gMkyXI,21679
216
217
  tpu_inference/models/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
217
218
  tpu_inference/models/jax/deepseek_v3.py,sha256=mje3RgxE1NwKWVLgJnPq3ebWB1J8T6YGHT2TtxN10Dg,45031
218
219
  tpu_inference/models/jax/gpt_oss.py,sha256=bgdsCx3UcTqEJatWBYbma5HNHH8GEaHN4aL5IsAeSmM,21592
219
220
  tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=XKpDgPkOiRtYaPrW76ILxcp2uFfSiE1JMdqHWGo0-Ss,3179
220
- tpu_inference/models/jax/llama3.py,sha256=WJDldaWgVmeJNg73Nj7A_sD1rmaH6GsgedPOSULxzFo,16093
221
+ tpu_inference/models/jax/llama3.py,sha256=FjTGC69V_EJmvb5BIqYu3V5NS1Pvy-5Pb34kMn5YU5U,16317
221
222
  tpu_inference/models/jax/llama4.py,sha256=Ssycb5fcGjhJYg8FfcNckVhow7bvVt0FJbbpHinzMAA,30206
222
223
  tpu_inference/models/jax/llama_eagle3.py,sha256=_wnljvb8lLCQ0Z3Vuw0QI7F6b41x6I1WuvstZWGvCYE,13051
223
- tpu_inference/models/jax/llama_guard_4.py,sha256=3jZP3Pkp-iDpS_9M9x6UNorGaWF9bssT9fi3Jmn4ja0,15850
224
- tpu_inference/models/jax/qwen2.py,sha256=NkFZTNtJjHjRhasDLtEtvNmH9i_AiM7OBDysLbBhXPk,13985
225
- tpu_inference/models/jax/qwen2_5_vl.py,sha256=XqvjmUpNmmDT0uRmwTq_nJNoBoLAbPRcbwgllNYkYLc,50408
224
+ tpu_inference/models/jax/llama_guard_4.py,sha256=R4wo45s1JsVD39t8JeAItujGoi-sl43HBH95hr7qEVw,15845
225
+ tpu_inference/models/jax/qwen2.py,sha256=bart2yYGv0J-lNbk8Hk5jn5IF6j_Jp8YKSEjwVU_y24,14038
226
+ tpu_inference/models/jax/qwen2_5_vl.py,sha256=3g3tUt7c83fKOdiMzuq2VyldCyeXoCBGrVYfqyIWwGE,50370
226
227
  tpu_inference/models/jax/qwen3.py,sha256=jVOOVrBFnxRIZ_Euo90iCga8rORpz0Kqs79uKqsFwEQ,11678
227
228
  tpu_inference/models/jax/utils/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
228
229
  tpu_inference/models/jax/utils/file_utils.py,sha256=8iZcGNvF1N0gNioH8fBlVYTSGYn4fC2WvmlTyeDZyZM,3415
229
- tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=CptSd8wk0KoXODCjMHjl62bp_41b2DNu2cxcCy4TAEg,6731
230
+ tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=c2LRXdOPi3F779yg2UX-DnuFDxF1JciTcFa09iODxZs,6695
230
231
  tpu_inference/models/jax/utils/weight_utils.py,sha256=0xyjGlDSrA09gtb4plw9yX57VPMgn3o5WNl6mXPDU70,23121
231
232
  tpu_inference/models/jax/utils/qwix/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
232
- tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=JOl3j4YO0P90ue0vsy-ZzNVGluh-VslAMOI-9wb1Igw,29288
233
+ tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=w3wmDb1drJxOK1mVRVMORznqKbtZqFfi7H0Ib_k-iW8,29526
233
234
  tpu_inference/models/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
234
- tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=UkTyx4IzHUrdnyv0uBpqG6DR0MaPCyRzTNnE_COjhlI,12896
235
+ tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=G4ClHbvMY0gPpTOFWStb1mEVVMzIc3-wz1KXC-mDpj8,13023
235
236
  tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=vsXQnC2aZ_mHKb-7d9UeN28lfawfApNTm5asUMgEhgo,1762
236
237
  tpu_inference/platforms/__init__.py,sha256=BK6rwAhiqVSAUJ9m9EehSKetA6hEPe92flD9Ei076WQ,649
237
- tpu_inference/platforms/tpu_platform.py,sha256=00BCnL-xO611RfxxTFd2YMu9BvEvdKcGnQ_QTRIKEoM,9789
238
+ tpu_inference/platforms/tpu_platform.py,sha256=bGTH1k0GI5AB_He5IghJtPwuyrhceUQ-rHs41fMpwhI,9826
238
239
  tpu_inference/runner/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
239
240
  tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
240
- tpu_inference/runner/compilation_manager.py,sha256=TSinw_COpMbLLOdH78e0H3S7F3vStYSRDB6aj_KEi9w,41456
241
+ tpu_inference/runner/compilation_manager.py,sha256=BFjOzJUyEJTmUZAvGCm3yeqoY7Kkw2JKc_A3CzRoN7o,42112
241
242
  tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
242
243
  tpu_inference/runner/kv_cache.py,sha256=xpB6VTrT3lIq5JNNPJTVEnHFgehIzgxKNIHxxXIxwKI,6046
243
- tpu_inference/runner/kv_cache_manager.py,sha256=Bd5nMH-KupjeuDpn9pHdV4NzZ7inVa-bSrVGF3AYgRo,23417
244
+ tpu_inference/runner/kv_cache_manager.py,sha256=u6pXaWPzmPe34lXiy-acAdGBmp9WEQrGvksyBfGBRdM,23342
244
245
  tpu_inference/runner/lora_utils.py,sha256=DGV_8aMrqb6Q4v7eC0UvipsM-6XQSt1afiZGKTKd6sc,4418
245
- tpu_inference/runner/multimodal_manager.py,sha256=qkDiQ2_dL5y5gndq3VtctnAHQxBkIC1HGS8haPEKuBo,10353
246
+ tpu_inference/runner/multimodal_manager.py,sha256=dQm0sQ9nGHaWRS8rVPDBZP4P6jNFcJPufnAxv8DoWYs,10344
246
247
  tpu_inference/runner/persistent_batch_manager.py,sha256=aCeTyqCgBnQy_6hXjiNLtF81ekG0-YwlQiWeJhx-pdM,13838
247
248
  tpu_inference/runner/speculative_decoding_manager.py,sha256=-eSxTIGXbRWRZjHJfikb7kfqbtr_cj7Pca9zInWSn1w,10790
248
249
  tpu_inference/runner/structured_decoding_manager.py,sha256=sj1fPrit0qdhcQtDbue5kpxos7zL16_dZQ5YSXTDbzg,4148
249
- tpu_inference/runner/tpu_runner.py,sha256=gDKkPJtlNqitoKQJb4EtdbUrV8Mzf1JJPRYHAnwGjnI,80544
250
+ tpu_inference/runner/tpu_runner.py,sha256=cgIyZiI3UjpvPWhNRL-mCSnssbbDNt00g5idAzwgWR0,80736
250
251
  tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
251
252
  tpu_inference/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
252
253
  tpu_inference/spec_decode/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
253
254
  tpu_inference/spec_decode/jax/eagle3.py,sha256=5WtEbkgzXpmFz374ibQD5IIcRro4d0SNeCYgBv2nM1c,19678
254
255
  tpu_inference/worker/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
255
- tpu_inference/worker/tpu_worker.py,sha256=ZVfXnSGWqY-dbRpN1lXm8n6CDHUkvIHXjsR4HT74ICU,21456
256
- tpu_inference-0.12.0.dev20251222.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
257
- tpu_inference-0.12.0.dev20251222.dist-info/METADATA,sha256=X5KHuShBIJ_5jo8GlzI1ji9YHKzdXbvpsk3qQczY5ac,5767
258
- tpu_inference-0.12.0.dev20251222.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
259
- tpu_inference-0.12.0.dev20251222.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
260
- tpu_inference-0.12.0.dev20251222.dist-info/RECORD,,
256
+ tpu_inference/worker/tpu_worker.py,sha256=ntwCibPyiw-z8aMUdtu8usqU_q2b0u7diWNOmpjG_6o,21651
257
+ tpu_inference-0.12.0.dev20251224.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
258
+ tpu_inference-0.12.0.dev20251224.dist-info/METADATA,sha256=gVLZ-35W1Nw3z2LnxeFYsNQHMRtTM7aUIAuWbxucsBg,5767
259
+ tpu_inference-0.12.0.dev20251224.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
260
+ tpu_inference-0.12.0.dev20251224.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
261
+ tpu_inference-0.12.0.dev20251224.dist-info/RECORD,,