tpu-inference 0.13.0rc2.post7__py3-none-any.whl → 0.13.2rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tpu-inference might be problematic. Click here for more details.

@@ -832,7 +832,7 @@ class TestGetDefaultQwixQuantizationConfig(unittest.TestCase):
832
832
  # Patch the constants in the module where the function resides
833
833
  self.patchers = [
834
834
  patch(
835
- "tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_DEEPSEEK_FP8_CONFIG",
835
+ "tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG",
836
836
  self.mock_deepseek_config),
837
837
  patch(
838
838
  "tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_LLAMA4_FP8_CONFIG",
@@ -35,7 +35,7 @@ DEFAULT_NUM_TOKENS_FOR_MODEL_INPUTS = 512
35
35
  DEFAULT_MAX_NUM_SEQS_FOR_MODEL_INPUTS = 256
36
36
  DEFAULT_MAX_NUM_BLOCKS_PER_REQ = 16
37
37
 
38
- DEFAULT_DEEPSEEK_FP8_CONFIG = {
38
+ DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG = {
39
39
  "qwix": {
40
40
  "use_abstract_model":
41
41
  True,
@@ -452,7 +452,7 @@ def get_default_qwix_quantization_config(
452
452
  # NOTE (jacobplatin): we'll default to mixed FP8 (attention) + FP4 (MoE experts)
453
453
  # for DeepSeek
454
454
  if model_type == "deepseek_v3" and quant_method == "fp8":
455
- config = copy.deepcopy(DEFAULT_DEEPSEEK_FP8_CONFIG)
455
+ config = copy.deepcopy(DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG)
456
456
 
457
457
  # Dynamically fetch block size from HF config if available
458
458
  # Config fmt: 'weight_block_size': [1, 512] -> we want the 2nd dim for tile_size
@@ -462,7 +462,7 @@ def get_default_qwix_quantization_config(
462
462
  block_size = hf_quant_config["weight_block_size"]
463
463
  if isinstance(block_size, (list, tuple)) and len(block_size) == 2:
464
464
  assert block_size[
465
- 0] == 1, f"Expected first dimension to be 1 (unchanneled), but got {block_size[0]}!"
465
+ 0] == 1, f"Expected first dimension to be 1 (unchanneled), but got {block_size[0]}! If you are trying to run quantized DeepSeek, we currently only support 1D-subchannel quantization and those models can be found here: https://huggingface.co/collections/jrplatin/deepseek-r1-1d-subchannel"
466
466
  tile_size = block_size[1]
467
467
  assert tile_size > 1, f"Expected tile_size > 1 for DeepSeek, but got {tile_size}"
468
468
  logger.info(
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import functools
16
- from typing import TYPE_CHECKING, Dict, List
16
+ from typing import TYPE_CHECKING, List
17
17
 
18
18
  import jax
19
19
  import jax.numpy as jnp
@@ -212,7 +212,6 @@ class KVCacheManager:
212
212
  # uniform page size.
213
213
  representative_spec = kv_cache_config.kv_cache_groups[0].kv_cache_spec
214
214
  page_size_bytes = representative_spec.page_size_bytes
215
- self.runner.layer_name_to_kvcache_index: Dict[str, int] = {}
216
215
  kv_caches = self.runner.kv_caches
217
216
  num_blocks_list = []
218
217
  for i, kv_cache_tensor in enumerate(kv_cache_config.kv_cache_tensors):
@@ -282,6 +282,9 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
282
282
  self._substitute_placeholder_token_fn = _substitute_placeholder_token
283
283
  self.execute_model_state: ExecuteModelState | None = None
284
284
 
285
+ self.kv_caches: list[jax.Array] = []
286
+ self.layer_name_to_kvcache_index: dict[str, int] = {}
287
+
285
288
  def _init_random(self):
286
289
  if self.model_config.seed is None:
287
290
  self.model_config.seed = 0
@@ -545,7 +548,6 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
545
548
  self.topology_order_id = topology_order_id
546
549
  self.kv_cache_config = kv_cache_config
547
550
  self.use_hybrid_kvcache = len(kv_cache_config.kv_cache_groups) > 1
548
- self.kv_caches = []
549
551
  self.kv_cache_manager.initialize_kv_cache(kv_cache_config)
550
552
  if has_kv_transfer_group():
551
553
  get_kv_transfer_group().register_runner(self)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tpu_inference
3
- Version: 0.13.0rc2.post7
3
+ Version: 0.13.2rc3
4
4
  Author: tpu_inference Contributors
5
5
  Classifier: Development Status :: 3 - Alpha
6
6
  Classifier: Intended Audience :: Developers
@@ -32,10 +32,6 @@ Requires-Dist: pathwaysutils
32
32
  Requires-Dist: parameterized
33
33
  Requires-Dist: numba==0.62.1
34
34
  Requires-Dist: runai-model-streamer[gcs,s3]==0.15.0
35
- Requires-Dist: jax==0.8.1
36
- Requires-Dist: jaxlib==0.8.1
37
- Requires-Dist: jaxtyping==0.3.2
38
- Requires-Dist: libtpu==0.0.31
39
35
  Dynamic: author
40
36
  Dynamic: classifier
41
37
  Dynamic: description
@@ -45,7 +45,7 @@ tests/layers/common/test_attention_interface.py,sha256=ke6h-e8CP-FhNY_ojKCYwyHgY
45
45
  tests/layers/common/test_quantization.py,sha256=JcwDrNTm6UlBSV3s3mwwvpxOjqBpZDJwnYYoj3DnS7A,5344
46
46
  tests/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
47
47
  tests/layers/jax/test_layers.py,sha256=L1xh_wniBtlfudya_WRmHUWOhEno0i6ikKE1XiBtaZs,5010
48
- tests/layers/jax/test_qwix.py,sha256=G7PrmkWkhQD8P0RvwnF-iyRoXO4d7g1Ce4ycaIjDQ_0,39727
48
+ tests/layers/jax/test_qwix.py,sha256=V8MpFKJb5_evs-Z4WeZ5SxA-KAyFD6Qrex7ExywLxmE,39744
49
49
  tests/layers/jax/test_rope.py,sha256=0biwYRSRsKMaRHknc8v8Tfrt0bmJKQGeQLPqR_D04mM,3565
50
50
  tests/layers/jax/test_sharding.py,sha256=Hk1MWhIluOKIBx7-O9fKa1n6fF3SW7UMYsRI9AGzp_0,5914
51
51
  tests/layers/jax/test_transformer_block.py,sha256=Wpgowc0ZJnv1GUxcK-Op6CCYWjpqgUM0p3EANk-YWzc,5742
@@ -230,7 +230,7 @@ tpu_inference/models/jax/utils/file_utils.py,sha256=8iZcGNvF1N0gNioH8fBlVYTSGYn4
230
230
  tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=c2LRXdOPi3F779yg2UX-DnuFDxF1JciTcFa09iODxZs,6695
231
231
  tpu_inference/models/jax/utils/weight_utils.py,sha256=0xyjGlDSrA09gtb4plw9yX57VPMgn3o5WNl6mXPDU70,23121
232
232
  tpu_inference/models/jax/utils/qwix/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
233
- tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=JOl3j4YO0P90ue0vsy-ZzNVGluh-VslAMOI-9wb1Igw,29288
233
+ tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=w3wmDb1drJxOK1mVRVMORznqKbtZqFfi7H0Ib_k-iW8,29526
234
234
  tpu_inference/models/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
235
235
  tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=G4ClHbvMY0gPpTOFWStb1mEVVMzIc3-wz1KXC-mDpj8,13023
236
236
  tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=vsXQnC2aZ_mHKb-7d9UeN28lfawfApNTm5asUMgEhgo,1762
@@ -241,21 +241,21 @@ tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_
241
241
  tpu_inference/runner/compilation_manager.py,sha256=BFjOzJUyEJTmUZAvGCm3yeqoY7Kkw2JKc_A3CzRoN7o,42112
242
242
  tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
243
243
  tpu_inference/runner/kv_cache.py,sha256=xpB6VTrT3lIq5JNNPJTVEnHFgehIzgxKNIHxxXIxwKI,6046
244
- tpu_inference/runner/kv_cache_manager.py,sha256=Bd5nMH-KupjeuDpn9pHdV4NzZ7inVa-bSrVGF3AYgRo,23417
244
+ tpu_inference/runner/kv_cache_manager.py,sha256=u6pXaWPzmPe34lXiy-acAdGBmp9WEQrGvksyBfGBRdM,23342
245
245
  tpu_inference/runner/lora_utils.py,sha256=DGV_8aMrqb6Q4v7eC0UvipsM-6XQSt1afiZGKTKd6sc,4418
246
246
  tpu_inference/runner/multimodal_manager.py,sha256=dQm0sQ9nGHaWRS8rVPDBZP4P6jNFcJPufnAxv8DoWYs,10344
247
247
  tpu_inference/runner/persistent_batch_manager.py,sha256=aCeTyqCgBnQy_6hXjiNLtF81ekG0-YwlQiWeJhx-pdM,13838
248
248
  tpu_inference/runner/speculative_decoding_manager.py,sha256=-eSxTIGXbRWRZjHJfikb7kfqbtr_cj7Pca9zInWSn1w,10790
249
249
  tpu_inference/runner/structured_decoding_manager.py,sha256=sj1fPrit0qdhcQtDbue5kpxos7zL16_dZQ5YSXTDbzg,4148
250
- tpu_inference/runner/tpu_runner.py,sha256=djG7LKp69B_tCfsbHB7DrYncxRlB8tUnzNCyL8GYs9s,80656
250
+ tpu_inference/runner/tpu_runner.py,sha256=cgIyZiI3UjpvPWhNRL-mCSnssbbDNt00g5idAzwgWR0,80736
251
251
  tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
252
252
  tpu_inference/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
253
253
  tpu_inference/spec_decode/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
254
254
  tpu_inference/spec_decode/jax/eagle3.py,sha256=5WtEbkgzXpmFz374ibQD5IIcRro4d0SNeCYgBv2nM1c,19678
255
255
  tpu_inference/worker/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
256
256
  tpu_inference/worker/tpu_worker.py,sha256=ntwCibPyiw-z8aMUdtu8usqU_q2b0u7diWNOmpjG_6o,21651
257
- tpu_inference-0.13.0rc2.post7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
258
- tpu_inference-0.13.0rc2.post7.dist-info/METADATA,sha256=1ZAhQLtZULopCk6yhmIPAlNwXWf3Yz4qbtwT_nRkN3Q,5881
259
- tpu_inference-0.13.0rc2.post7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
260
- tpu_inference-0.13.0rc2.post7.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
261
- tpu_inference-0.13.0rc2.post7.dist-info/RECORD,,
257
+ tpu_inference-0.13.2rc3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
258
+ tpu_inference-0.13.2rc3.dist-info/METADATA,sha256=M-zpy13-UvyJlBBO2T6TWb9aUhSJGOVD6vgth9NUvdo,5758
259
+ tpu_inference-0.13.2rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
260
+ tpu_inference-0.13.2rc3.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
261
+ tpu_inference-0.13.2rc3.dist-info/RECORD,,