tpu-inference 0.11.1.dev202511220812__py3-none-any.whl → 0.12.0.dev20251213__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tpu-inference might be problematic. Click here for more details.

Files changed (59) hide show
  1. tests/kernels/fused_moe_v1_test.py +303 -34
  2. tests/kernels/mla_v1_test.py +129 -41
  3. tests/kernels/quantized_matmul_kernel_test.py +2 -34
  4. tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py +3 -1
  5. tests/kernels/ragged_paged_attention_kernel_v3_test.py +3 -1
  6. tests/lora/test_layers.py +4 -1
  7. tests/lora/test_lora_perf.py +53 -0
  8. tests/test_envs.py +110 -12
  9. tests/test_quantization.py +3 -0
  10. tests/test_utils.py +1 -2
  11. tpu_inference/distributed/tpu_connector.py +1 -1
  12. tpu_inference/envs.py +92 -8
  13. tpu_inference/executors/ray_distributed_executor.py +5 -1
  14. tpu_inference/kernels/collectives/all_gather_matmul.py +12 -6
  15. tpu_inference/kernels/collectives/all_gather_matmul_tuned_block_sizes.py +7 -2
  16. tpu_inference/kernels/fused_moe/v1/kernel.py +712 -143
  17. tpu_inference/kernels/mla/v1/kernel.py +98 -120
  18. tpu_inference/kernels/quantized_matmul/kernel.py +69 -8
  19. tpu_inference/kernels/ragged_paged_attention/v2/kernel.py +2 -1
  20. tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py +2 -1
  21. tpu_inference/kernels/ragged_paged_attention/v3/kernel.py +82 -32
  22. tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py +146 -85
  23. tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py +2 -1
  24. tpu_inference/kernels/ragged_paged_attention/v3/util.py +2 -1
  25. tpu_inference/layers/common/attention_interface.py +7 -1
  26. tpu_inference/layers/common/sharding.py +11 -7
  27. tpu_inference/layers/jax/attention/deepseek_v3_attention.py +232 -64
  28. tpu_inference/layers/jax/attention/gpt_oss_attention.py +5 -5
  29. tpu_inference/layers/vllm/fused_moe.py +170 -208
  30. tpu_inference/layers/vllm/linear_common.py +43 -21
  31. tpu_inference/layers/vllm/quantization/common.py +11 -6
  32. tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py +4 -3
  33. tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py +74 -65
  34. tpu_inference/layers/vllm/quantization/mxfp4.py +140 -94
  35. tpu_inference/layers/vllm/quantization/unquantized.py +103 -80
  36. tpu_inference/models/common/model_loader.py +78 -22
  37. tpu_inference/models/jax/deepseek_v3.py +185 -64
  38. tpu_inference/models/jax/gpt_oss.py +3 -3
  39. tpu_inference/models/jax/llama_eagle3.py +4 -5
  40. tpu_inference/models/jax/qwen2_5_vl.py +161 -47
  41. tpu_inference/models/jax/utils/quantization/quantization_utils.py +7 -8
  42. tpu_inference/models/jax/utils/weight_utils.py +203 -155
  43. tpu_inference/models/vllm/vllm_model_wrapper.py +11 -5
  44. tpu_inference/platforms/tpu_platform.py +29 -48
  45. tpu_inference/runner/compilation_manager.py +112 -46
  46. tpu_inference/runner/kv_cache.py +40 -20
  47. tpu_inference/runner/kv_cache_manager.py +40 -31
  48. tpu_inference/runner/persistent_batch_manager.py +40 -2
  49. tpu_inference/runner/structured_decoding_manager.py +2 -3
  50. tpu_inference/runner/tpu_runner.py +94 -51
  51. tpu_inference/runner/utils.py +2 -2
  52. tpu_inference/spec_decode/jax/eagle3.py +71 -22
  53. tpu_inference/utils.py +41 -14
  54. tpu_inference/worker/tpu_worker.py +43 -45
  55. {tpu_inference-0.11.1.dev202511220812.dist-info → tpu_inference-0.12.0.dev20251213.dist-info}/METADATA +8 -9
  56. {tpu_inference-0.11.1.dev202511220812.dist-info → tpu_inference-0.12.0.dev20251213.dist-info}/RECORD +59 -58
  57. {tpu_inference-0.11.1.dev202511220812.dist-info → tpu_inference-0.12.0.dev20251213.dist-info}/WHEEL +0 -0
  58. {tpu_inference-0.11.1.dev202511220812.dist-info → tpu_inference-0.12.0.dev20251213.dist-info}/licenses/LICENSE +0 -0
  59. {tpu_inference-0.11.1.dev202511220812.dist-info → tpu_inference-0.12.0.dev20251213.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  tests/test_base.py,sha256=Ct5WFRMHL7IHEIxk8FrzAvO8m0xFuDpzDBKkAKKAL2Q,7341
3
- tests/test_envs.py,sha256=Woyfp_d5HS-uTGo4_u9dYlBbgmhfIEoFb-Rx_k7YXD4,6298
4
- tests/test_quantization.py,sha256=IT5ASyS1uuWcxc22kRtBcA-V4j3Z3hb7pMztm3GOlBs,34445
3
+ tests/test_envs.py,sha256=v0_R-HfWRNY8ssPqFrytHMl1irohJaTpS_rSKo2FZaY,10021
4
+ tests/test_quantization.py,sha256=VaxrxS-05PeHEit1bVHZSGnGO2lk2TTjyWh4pxAZwOw,34615
5
5
  tests/test_tpu_info.py,sha256=ZrwlMsp8ffITkS_b8Q1t_QG-a-WVAd4NUcjHhGibcsI,4670
6
- tests/test_utils.py,sha256=Mta5ZzYCgRAh1-BjcOvvx9iQ9DnnXLps7oDHxVQp2yE,8236
6
+ tests/test_utils.py,sha256=GIXLdd-x4gnqSLrySXGk22phqPc8MegFd7ph1Jj8OcU,8182
7
7
  tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
9
9
  tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
@@ -11,25 +11,26 @@ tests/core/test_disagg_utils.py,sha256=alktTGppaGdg-_un0Amz8Y0IDQz-xNJN0dXG-YApE
11
11
  tests/core/test_dp_scheduler.py,sha256=IwCR1Vs96V4CQdWA051rNaYxxr2V_byA1yx9HWyRoMg,37339
12
12
  tests/core/test_init.py,sha256=NEFI5A9eKGu4rmeJ2iqd0EmhlA3bzbVkXmMi1PV1b9U,1687
13
13
  tests/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- tests/kernels/fused_moe_v1_test.py,sha256=c6zbSHQDzOseeyL9VCjQeP7zayNnwYf059CPlKcvZzQ,3137
15
- tests/kernels/mla_v1_test.py,sha256=oZc4TCgquiG0KOeWfv46yJbUIpro_CgCMFc7vzyB7t8,11646
16
- tests/kernels/quantized_matmul_kernel_test.py,sha256=od5-zXFjcsc_gWGRDrREL8E_ftymNniQVTzgtkBo_Gc,5679
14
+ tests/kernels/fused_moe_v1_test.py,sha256=sQ6gvpti94fpPYrSZn7frPPNjqbVmRibFtenVrGGCA4,10403
15
+ tests/kernels/mla_v1_test.py,sha256=FZnFVQomU39fsmZaZaLr51MXN5PeLw5cTOe0eIoLO8o,15501
16
+ tests/kernels/quantized_matmul_kernel_test.py,sha256=9Q3ufAG6NY9jeEFcre_IY2JbwpQdYzzhMWbXb5yfY6Q,4796
17
17
  tests/kernels/ragged_kv_cache_update_v2_test.py,sha256=6-HjP5CoUG-kcuP8MS-JJVMiBnPRo_zadS3VInnO0D4,10821
18
18
  tests/kernels/ragged_paged_attention_kernel_v2_test.py,sha256=pWqo9UYF0tzwgBKO_xYw-TYSPrtAsKcMK5Haj8hFG7I,11340
19
- tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py,sha256=JhIElqUZIRqIsfQ3U1RUzSiH_gz_SabAqDosGGZ2tlA,16321
20
- tests/kernels/ragged_paged_attention_kernel_v3_test.py,sha256=Hrd8iUkS1pS3rxeTyY53aYRg_ZL_d3NqgBXvOgnigSU,14838
19
+ tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py,sha256=1tLpdHK3jb1OYICZ7aPViUsR3NAW1D1pQaoz_fTrrx4,16432
20
+ tests/kernels/ragged_paged_attention_kernel_v3_test.py,sha256=tEMWsQ6M5zO8aOa-JW6XFQJHTyep4aS7pwXe33oLPDs,14949
21
21
  tests/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  tests/lora/conftest.py,sha256=EXjwE1CjmUUlMEXpyE3UwxvgrKUllE73I8BNKfP1FTc,984
23
23
  tests/lora/test_bgmv.py,sha256=gQxWsJdNX2nkrE2xyrG0exwf3E2eHm2k2nkEXoANuQc,1359
24
- tests/lora/test_layers.py,sha256=6B4HhMAItQmt0hPAQgyXgwSYs7b3bIbUf6LaPsqXLzY,25923
24
+ tests/lora/test_layers.py,sha256=xeA4vFD6pm1DY8Quov-qryjcS3X7ksux9C9U4ectBaQ,26084
25
25
  tests/lora/test_lora.py,sha256=wJiF1P1BDnPN8TLX2tlFtdZ_QCkV-S9nPl6_uR6DqFc,4439
26
+ tests/lora/test_lora_perf.py,sha256=f_uTnJbgHdcyMnVWImgloGaQNRdsMsGhBSkb_G3oJx8,1806
26
27
  tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
27
28
  tpu_inference/__init__.py,sha256=p4MaepRdN7723FUNE-3pOMxZWjFn4_TVFgjrNyty4JE,2304
28
29
  tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc,365
29
- tpu_inference/envs.py,sha256=hoPuT0SyLCxqyZ0QJIha6EXSZv2TpACfmENuiT0iJMM,3956
30
+ tpu_inference/envs.py,sha256=A1Bdm5qiXhTdu-Q_yNzBpi79_nOJIDbdFF7MAMqmjxo,6662
30
31
  tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
31
32
  tpu_inference/tpu_info.py,sha256=3iilHRQSFjwMJwhKcuuawTm7mhwkgHbj4zi6CiAySrs,2265
32
- tpu_inference/utils.py,sha256=Ddsx2CY2ARe46RZL27URzXCN3P6pMcKWB-APXUB8sHs,10098
33
+ tpu_inference/utils.py,sha256=Gx9AKphXvY5ltfXL5DNKEH-I7LN6V4ZIv7cqTgxMtaI,11088
33
34
  tpu_inference/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
35
  tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
35
36
  tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
@@ -38,47 +39,47 @@ tpu_inference/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
38
39
  tpu_inference/core/sched/dp_scheduler.py,sha256=mKs8Ms46szdlBfo8hjdqis2ZKAZbcKnHAGfEr0X5R8g,22527
39
40
  tpu_inference/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
41
  tpu_inference/distributed/jax_parallel_state.py,sha256=5_xCwcL03lFPUoSO_OP7hIVKpUFroW1m-jVO7R6FbUc,2223
41
- tpu_inference/distributed/tpu_connector.py,sha256=w_gOI6hX7NWefaxN_9XH9TXReGElOyFifdDHpPswotM,29696
42
+ tpu_inference/distributed/tpu_connector.py,sha256=kLaTwy6BrAThJeFkd1soJ47bBo5iGp4GjUJs7xFx4Tg,29696
42
43
  tpu_inference/distributed/utils.py,sha256=1KIREn28Zg10O-MSUkVQMRzS09WoGc_VLGOX4QTFJac,1504
43
44
  tpu_inference/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- tpu_inference/executors/ray_distributed_executor.py,sha256=emYfSFJ3kluEmi6mlfnvxSUrC_mGVRVcjrUqUH2MR4g,16122
45
+ tpu_inference/executors/ray_distributed_executor.py,sha256=9CnzWb8aurH1B0tJfMHB73F-RQBGqSf5DnymetBvZ5o,16225
45
46
  tpu_inference/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
47
  tpu_inference/experimental/llama3_jax_stashed.py,sha256=YK1oSIfto9ALo-HB45XfSrbq9XgVbE4m2C-9zRwmSzI,10913
47
48
  tpu_inference/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
49
  tpu_inference/kernels/collectives/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- tpu_inference/kernels/collectives/all_gather_matmul.py,sha256=0OYLLjlDmkRYScl7lHRi0o___5I5iMiW1gso-_dWSbc,27255
50
- tpu_inference/kernels/collectives/all_gather_matmul_tuned_block_sizes.py,sha256=KdaOIzTfIgUR0CcUTA46tpYH-cxPNoJx2cTMEvHx-Ac,1399
50
+ tpu_inference/kernels/collectives/all_gather_matmul.py,sha256=TtQWY0lNj8699JwDmjqbRrdku-3oAw5WkuuoFPS49AY,27597
51
+ tpu_inference/kernels/collectives/all_gather_matmul_tuned_block_sizes.py,sha256=OEPf4q08IeIFyJfzizgRs6kSD7w35NeZDRIn7CcZ344,1468
51
52
  tpu_inference/kernels/collectives/util.py,sha256=LbLD6lOxuszbUsykF89gWQqEJUICCZsfzam3EJDPnFE,1859
52
53
  tpu_inference/kernels/flash_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
54
  tpu_inference/kernels/flash_attention/kernel.py,sha256=n8gmAFVfchMXlyaSEj8xXJm6AadFt26edQihPRdithY,25897
54
55
  tpu_inference/kernels/fused_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
56
  tpu_inference/kernels/fused_moe/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- tpu_inference/kernels/fused_moe/v1/kernel.py,sha256=QHB0QEvC3x_6zhwz06JQpaOncQcNAhOSV92dD5tGVq8,40869
57
+ tpu_inference/kernels/fused_moe/v1/kernel.py,sha256=p1gfFKK-Goa0RVFusVup_F9fVKdZslYQp60S93v43Q0,65179
57
58
  tpu_inference/kernels/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
59
  tpu_inference/kernels/mla/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- tpu_inference/kernels/mla/v1/kernel.py,sha256=dw1nhpL47uQxMFOIN2kENC6aITbalT81YZLAyr1usLU,51571
60
+ tpu_inference/kernels/mla/v1/kernel.py,sha256=5VBo2-C2-hRWSS1NVlDLneZixtnFiJQX6vE5cDBf5Xs,50222
60
61
  tpu_inference/kernels/quantized_matmul/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- tpu_inference/kernels/quantized_matmul/kernel.py,sha256=4oEVUXgWOeOY-PfySHf-iEuUSd9J7GQk_rDSbxa7CXg,14086
62
+ tpu_inference/kernels/quantized_matmul/kernel.py,sha256=-A9Kd2ApHWgPvCaUPfjM5JooLz_iCfWV1UT0taaZaAo,16264
62
63
  tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py,sha256=3zhIm73JEE8qOty2_0v3AJlVz13k6qMB5wlXBDyC1EM,35130
63
64
  tpu_inference/kernels/quantized_matmul/util.py,sha256=rf6nIiAj9I2cj4LDvtaZGhcLXEc94o2xgMWasnFaREM,1943
64
65
  tpu_inference/kernels/ragged_paged_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
66
  tpu_inference/kernels/ragged_paged_attention/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- tpu_inference/kernels/ragged_paged_attention/v2/kernel.py,sha256=OiQGAHhyggbp1PeuasPymopFohKOJjGXcpq9p_S8UWA,34940
67
- tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py,sha256=vGp2ZWODTbjyG9z2z0Qf_BX-wYHd5bUybnc_DtOz0nI,10995
67
+ tpu_inference/kernels/ragged_paged_attention/v2/kernel.py,sha256=462jgsWdnaQfO9K1Y99cJ-qidYWXZMc5GdoY9enQEWY,35019
68
+ tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py,sha256=y9-C_F28WGd282Ra_DqwTbHyUIIj2jyWY3DiX8yozHY,11080
68
69
  tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py,sha256=mw80bXBGenroGdrITV0F_EaI2s-Z9KWwqU9WodvJg14,97919
69
70
  tpu_inference/kernels/ragged_paged_attention/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=O179Fft5KpuN5LIFx3SghWXJJUqh3Og-xqfO4Z8QXYU,57032
71
- tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=z0oaH8ZkDmHSoG4yiiO2CN0kuAuFcEpQ3RUoi5msjlo,56904
71
+ tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=X4kz6C4Zujy7Lm5pQhMRHqvWHO6baXDcosbUEUgJ4us,59258
72
+ tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=9M67U_lL2pYmR_TfNfJ_JexxlXoqUz9p_uXw4rlRvVo,59715
72
73
  tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=k3LwduhZO85cJ-pSgnGN0c2Nn8eNeQq4eA94KUXJzMw,142198
73
- tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py,sha256=P3_ivi8iUz5QMU_3pgpl4Bkbmn0q0NpDtVJX39haRQA,11208
74
- tpu_inference/kernels/ragged_paged_attention/v3/util.py,sha256=1N_ozjKboDYLteFJndWoLXNudj2z53rGXMkELa5Z9tY,1102
74
+ tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py,sha256=O0XK9pPS3QyH3-7PG7m7FD9cMGV_vYV0f39evmLDDhI,11235
75
+ tpu_inference/kernels/ragged_paged_attention/v3/util.py,sha256=4ypt-NQwhfjCtPxFdZ1OhidZtiVRGPGbNMj1F9HbqMk,1181
75
76
  tpu_inference/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
77
  tpu_inference/layers/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- tpu_inference/layers/common/attention_interface.py,sha256=CImMS8tuWgvaRY9YbGS3pY7OBnzeJ4Jla7LRFb4Xoa4,13224
78
+ tpu_inference/layers/common/attention_interface.py,sha256=V3YowJ_3PTwpRnN72o8Kph17szBB4x9fKC-XE8JpvUc,13368
78
79
  tpu_inference/layers/common/attention_metadata.py,sha256=St8ZatbY1D7xQACKJH459jMgp3oTP3AQ36mi9FZdrPU,850
79
80
  tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
80
81
  tpu_inference/layers/common/quant_methods.py,sha256=mQSxZ44-QQtm22C_8ViejnP1cP2Dv6yc2YaP6oMKJeQ,185
81
- tpu_inference/layers/common/sharding.py,sha256=wBqdkXZSWfnnH8pkJtyW2DSqmAe_V4Vxi0iMPaXq0Z0,25185
82
+ tpu_inference/layers/common/sharding.py,sha256=_FTTPrbet069tHRSAaSg_nT3DYRM5x4pRLZPO7jwqUY,25410
82
83
  tpu_inference/layers/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
84
  tpu_inference/layers/jax/base.py,sha256=Vhts6ZMwNCZ8LbnEXeB0rl3nHdS5hDJWX7HEa7Fl7yE,5775
84
85
  tpu_inference/layers/jax/constants.py,sha256=NcYg0zAf3ClfP7YMYdYu_F1GngOzZaIxIAHBZDunKw4,2755
@@ -89,8 +90,8 @@ tpu_inference/layers/jax/rope_interface.py,sha256=X0SruXizlCHGnssFujC1pL07UC4Vsp
89
90
  tpu_inference/layers/jax/transformer_block.py,sha256=ufv-yfVDmRP_Ynrx3UX9xj-x0PkNw_tQ-0N0eYf4i7M,3917
90
91
  tpu_inference/layers/jax/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
92
  tpu_inference/layers/jax/attention/attention.py,sha256=DJFDkpQc9SDD156wVPFw3r2XaBgb44QNJ8OcdONaF5g,10085
92
- tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=YlagoBMwINv2KRH1dr4oEcH_cQ9QMPB55nO2FQZsWs0,14010
93
- tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=rkrEv4aNZxtAGcXd1HXHUxhNeDNAd9nWTEZOKWSI8cA,8725
93
+ tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=02Zn5fIlV11QC0wx-prEp2gRxqo1YgTPlqleDKWcv_U,21086
94
+ tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=A7V0M24p1yLKE3Gx7mIXEP7NG5tTAl5SxZLuDYIW6M0,8703
94
95
  tpu_inference/layers/jax/attention/llama4_attention.py,sha256=VvUmfBxQEbHf3F2BrcYDUnq5abj7CSDYeRsNx_eVAh0,6162
95
96
  tpu_inference/layers/jax/moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
97
  tpu_inference/layers/jax/moe/deepseek_v3_moe.py,sha256=Q6CuwwiZtWYm6iUee1wJoDJrwJE6_bcznTK2HrtXb0M,26089
@@ -102,17 +103,17 @@ tpu_inference/layers/jax/sample/sampling.py,sha256=C30KgmdOVSaagvHhbfLgVJtVQmJo8
102
103
  tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=Gd835LNWfGM0NRQBVBqEv0nPwt5q9F4AdFym0CUS1fw,2561
103
104
  tpu_inference/layers/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
105
  tpu_inference/layers/vllm/attention.py,sha256=wbJpcgqEAuIirv5PIULbiP-ggMKjmTanbB7Dg0BVYv4,7366
105
- tpu_inference/layers/vllm/fused_moe.py,sha256=XZt2CPUz00qZzDcyfBFz6buhVzmGL1amHalHJALl9zw,18945
106
- tpu_inference/layers/vllm/linear_common.py,sha256=_YlJtbdaYcck_j-gFLos_k0ycktVWxT8Qo57tR2YqJ8,7749
106
+ tpu_inference/layers/vllm/fused_moe.py,sha256=qGbQoCq-sdcZj_Q0kP6RzQk7_YvcX7FopkpLcerjNFM,17819
107
+ tpu_inference/layers/vllm/linear_common.py,sha256=AaI5fyivhsEtpIqOY3DKiS8ZxL62v8O4MYKn8E8eYoA,8579
107
108
  tpu_inference/layers/vllm/sharding.py,sha256=as7CF8UKTF3ToymwRY5Pi8uzwJk0P1sHPkWB5xEx3mA,9169
108
109
  tpu_inference/layers/vllm/quantization/__init__.py,sha256=SEppGayBzzQ5tsXLSy99aqilkAawQwYxnv2alCg6-ZU,1777
109
110
  tpu_inference/layers/vllm/quantization/awq.py,sha256=-8ZmjGvSKJB6_JuwSctNWt8xHWq4VSvK_AK9iahlgCo,8495
110
- tpu_inference/layers/vllm/quantization/common.py,sha256=wm3pge6XMTMsLK7_SSdgBP0PvQzz-1mrqN2I6xMqzrc,4218
111
- tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=KwGoqIiPkd6FplGuYAKi4uX5A8MPlZqq99MVPchXyi4,11561
112
- tpu_inference/layers/vllm/quantization/unquantized.py,sha256=Q1v1ZbSIDmaoOg97Ehv6rA5CnSf6nTP40xDBMmHHeLw,15054
111
+ tpu_inference/layers/vllm/quantization/common.py,sha256=U3fm5rzQNmWa8i0dqx7Km8WZWVobulYi1I1RRokiJ-M,4324
112
+ tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=9wdImu5zkMRzeraHxq_Wxkl2EZyxbHSGQaImCqEWEAg,12837
113
+ tpu_inference/layers/vllm/quantization/unquantized.py,sha256=KC_cCFDi4AmMp17tKokCq0cWlU-0zaXQzLYovUaZHok,15424
113
114
  tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=6idEyy3e849fZ1UeNvc9eSHYX7e6qvohrJa_d_D9MBk,5285
115
- tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=FM901QhyhJRC8CuMeICzCVVERvBHbhruRxYW0EQ570s,8820
115
+ tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=LRR0mGtVm1k70-dsgUxtVdvDdoPYpfD-1w3WoljbYEo,5339
116
+ tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=4dd1kyLHp-xTLkuuH7oU-XAmd0L7vGuMv7dS8h0N0aU,9177
116
117
  tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
118
  tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=6sQvsxiWdi5Vte8V9vrQ2abaqGqWpq-mtzU7lGAo-ac,8759
118
119
  tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=4y7lYgybpXszpCAtxGFhR8LDEbEoCCeo3DfUSOXxhaQ,5202
@@ -121,54 +122,54 @@ tpu_inference/lora/torch_lora_ops.py,sha256=pr3N7DVfkn3ANijUC6dBoiCtIJW4fdJpKdC3
121
122
  tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
122
123
  tpu_inference/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
124
  tpu_inference/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
- tpu_inference/models/common/model_loader.py,sha256=3rRntyGqS6l7yAfURmRaGkhyIaee2E43a5F0_i0IFmE,18177
125
+ tpu_inference/models/common/model_loader.py,sha256=RKLSj4BnkaEZe4R0JG1L_ghX4VTgygm-u5aGGVJk9lM,21035
125
126
  tpu_inference/models/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
- tpu_inference/models/jax/deepseek_v3.py,sha256=SKOHVEC-_2NLxBnzBzbu5tu0d6FTlAEiI1EefGaO2QE,40047
127
- tpu_inference/models/jax/gpt_oss.py,sha256=Vw4LRB5Kp6hbA2hjZGFS8kiEqOCjf881XH2JNtu2S1I,20924
127
+ tpu_inference/models/jax/deepseek_v3.py,sha256=rsGOV6b_EnOxMwz3r2s508g_fhdLJ2hj2EwP2mN0A8I,45967
128
+ tpu_inference/models/jax/gpt_oss.py,sha256=IvTAKtdJIlBpvYUcPWfwPDyOIVd4ci6eNkoV5AEUia4,20930
128
129
  tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=Pxu1PCV5LN5X58aYVkPiohcXZIeKVim2oqvrS_cVgw4,2604
129
130
  tpu_inference/models/jax/llama3.py,sha256=ZiFtrpAzXTT9vAPES9UeuJInCWGbvDWs7g0_JLdCCa4,13479
130
131
  tpu_inference/models/jax/llama4.py,sha256=wf2Sp2iYViaYD5rSfv3_ryO6gYuYM5XaOyvghaP4OCY,29631
131
- tpu_inference/models/jax/llama_eagle3.py,sha256=xUoNetxDbcFIEVLZ2DiD-GEQhHcdau2v1R12WdMyGec,12550
132
+ tpu_inference/models/jax/llama_eagle3.py,sha256=7-U99yvBkle-FSZ3NDDI-obWSQ2Fo2OTOi1H67H4jxY,12476
132
133
  tpu_inference/models/jax/llama_guard_4.py,sha256=LrnU2zBWM0s4q_5dwmR--OO0V7ttltsYhrHYlBgQVIw,15275
133
134
  tpu_inference/models/jax/qwen2.py,sha256=SuAp7tErk8OoIRko0Vt6QSOZP_9B9r5GTfqmVfImUIo,13410
134
- tpu_inference/models/jax/qwen2_5_vl.py,sha256=tf177ypgA1ZVIn34Ff_LTwr10NwzlZ3-DPqSoRLAQtQ,43995
135
+ tpu_inference/models/jax/qwen2_5_vl.py,sha256=WUOmqNE6fHQ8PGU85Y8Bt6-CtCC1Uubbox_9FdpDMMo,49833
135
136
  tpu_inference/models/jax/qwen3.py,sha256=CIZQKjZDke_LPGsLNhRCJdDTzWueUneBPAQ1blS24IM,11050
136
137
  tpu_inference/models/jax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
138
  tpu_inference/models/jax/utils/file_utils.py,sha256=NOuSC3YFnZpf3CZgYdghbbiNYJt42zgjlEYbOZIVct4,2840
138
139
  tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=rrIrQWidkUnGilBHKNpdYh7_2BkvnAaqanXjC81GNcg,6156
139
- tpu_inference/models/jax/utils/weight_utils.py,sha256=d5u8pPR-qPbEjX-8BMY0Zea9O-a34CpfuDlVnbwWfAw,20659
140
+ tpu_inference/models/jax/utils/weight_utils.py,sha256=JXclW_ioRlizTBHweDj8Ml-ybv5HT-cucMf4w_0AgIc,21678
140
141
  tpu_inference/models/jax/utils/quantization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
141
142
  tpu_inference/models/jax/utils/quantization/mxfp4_utils.py,sha256=boGnqJCRIOf5nedAxQ8_IUTV6Rfll10DXnRC40BeeE8,3682
142
- tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=xgKoKB7AM3TYPxzVgEGLTK9ebQH2Kx8mNuO0heovkmk,26778
143
+ tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=0ASVjrqYiyX27-U71OMxCMkqSln-s8H9rZlPCfFD_qI,26586
143
144
  tpu_inference/models/jax/utils/quantization/configs/fp8_all_modules_w_only.yaml,sha256=d_YHPtaRJ_7PBrPijSzJGnVeoJO62tKIGqrgFqpYT1k,137
144
145
  tpu_inference/models/jax/utils/quantization/configs/fp8_default.yaml,sha256=b7SyL75HuSTj3fN9_ZLCK_CDiccL5DGq_DddGmxj_qk,170
145
146
  tpu_inference/models/jax/utils/quantization/configs/int8_all_modules_w_only.yaml,sha256=0Qwij71zj9k6rmrUNd8Q5df9YYfkoJ1ZkgMAHxQy81k,128
146
147
  tpu_inference/models/jax/utils/quantization/configs/int8_default.yaml,sha256=lGec0UwwxmNPNgKPSsTsCMSXNJjhw507KMtM2NsSCMw,152
147
148
  tpu_inference/models/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
- tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=hEjg5hKotp-fEt3SXWkWpdnQ32TU1XGpTrfhyLTNyt0,12054
149
+ tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=3EcaD_1vZuyAZBfDtm5u_qfCahQU28qR4rAUraNAFqs,12305
149
150
  tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=yxlJHPmRQIAwlb1MmHK3xfXokgIkJ-evNU4PgyoJUdg,1187
150
151
  tpu_inference/platforms/__init__.py,sha256=lQCrKddS_GcGpCbeogvz9zOZD1mQw5bBsiw8On46qFQ,74
151
- tpu_inference/platforms/tpu_platform.py,sha256=RSCe3Ne1FsWXVrX6_6V_Z6B0TDTRS38eM0KTkXbQ_w8,10579
152
+ tpu_inference/platforms/tpu_platform.py,sha256=naS-yuZXi16hAkQ5n-Leru0y-GeIzX0WbsCTM0AGACk,9629
152
153
  tpu_inference/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
154
  tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
154
- tpu_inference/runner/compilation_manager.py,sha256=oVML1KhhQ7YFaSWBaJA0qWQoNX2qRZOrwbbh4XYPc-8,37287
155
+ tpu_inference/runner/compilation_manager.py,sha256=tWzyNBMYFk-G6hRMyos5aEG4k_YgXUyD-VIOWeC1Da4,40423
155
156
  tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
156
- tpu_inference/runner/kv_cache.py,sha256=F4dzW2d53xuxkFUn0oKzwE6VklGUeVm-QM19NVfIQDU,4577
157
- tpu_inference/runner/kv_cache_manager.py,sha256=XEfis_9nQAz8uxM5y_P5biqSUijX4IeMhIusTf2V7vg,22444
157
+ tpu_inference/runner/kv_cache.py,sha256=e3sfOJRuaLC7WVX0V_lFTxo-Z5pQf3CAnAgGy707YxY,5471
158
+ tpu_inference/runner/kv_cache_manager.py,sha256=F2oVejiCaPBNDJAO0y6FYv6YyPsKckVRE--Zhv4zXo0,22842
158
159
  tpu_inference/runner/lora_utils.py,sha256=B4xMCgXGJ4VNdePvn89HH3tIZ-gYsQ7Vq_YCiYIATEY,3843
159
160
  tpu_inference/runner/multimodal_manager.py,sha256=azEPdHOwz8CN11MQmorGdtrCLbFaTCxdWyuEsZTzjYM,9778
160
- tpu_inference/runner/persistent_batch_manager.py,sha256=KERSfKy6XjMejnbtPGI3hzoYAHJLeCxmpZVYPqBCago,11156
161
+ tpu_inference/runner/persistent_batch_manager.py,sha256=Otu67vOTf1_HKAMZgPDDHlRvvZ3YVJdz-QderH4qOII,13263
161
162
  tpu_inference/runner/speculative_decoding_manager.py,sha256=I3FDWKh2dn6nV8LgTGfCTwMKYnxQsTPpBIrmaJngXHs,10215
162
- tpu_inference/runner/structured_decoding_manager.py,sha256=Y0ERPhj4olFh6Y2TxP0R1_4UIJwy7nemYA-h63YIR2U,3622
163
- tpu_inference/runner/tpu_runner.py,sha256=aHXHSlaNuc9q7pcPklqTFRkmkEQDULEEH_hsR_NcTMQ,77532
164
- tpu_inference/runner/utils.py,sha256=ZnWUoNo-7INeB0mdXti1jwUOdbmxyExznOs-crRTQLk,17126
163
+ tpu_inference/runner/structured_decoding_manager.py,sha256=gZQKQUFxh6xYYH9eGTdbguqk8hc2WwTrIdMMuCcbymE,3573
164
+ tpu_inference/runner/tpu_runner.py,sha256=yjY9dGpHku6mhSjD113viNPwvrmBXwiRvzhj2RrvI_E,79834
165
+ tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
165
166
  tpu_inference/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
167
  tpu_inference/spec_decode/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
- tpu_inference/spec_decode/jax/eagle3.py,sha256=1WVHTdv6jfCKwbiz0RwQLPyq8L720gD_bs0p_Gz0QiI,16644
168
+ tpu_inference/spec_decode/jax/eagle3.py,sha256=FxP0uWeQlHlgCpt1nY3FUd4lKlegKJljHyc05jJucaQ,19104
168
169
  tpu_inference/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
- tpu_inference/worker/tpu_worker.py,sha256=aojB9-PY_ZzTaZgv1i5PUB9CSXNVuK4JZzftCv9ku4A,20642
170
- tpu_inference-0.11.1.dev202511220812.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
171
- tpu_inference-0.11.1.dev202511220812.dist-info/METADATA,sha256=JzmyOlYYkImIe_WSawI0LDwL28xS-0SCRCcFXeYSV0g,5465
172
- tpu_inference-0.11.1.dev202511220812.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
173
- tpu_inference-0.11.1.dev202511220812.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
174
- tpu_inference-0.11.1.dev202511220812.dist-info/RECORD,,
170
+ tpu_inference/worker/tpu_worker.py,sha256=ygpjBeSRd7iz6Upf2NxuExsyHhHJFdCgdUL0SN2HRTE,20615
171
+ tpu_inference-0.12.0.dev20251213.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
172
+ tpu_inference-0.12.0.dev20251213.dist-info/METADATA,sha256=Uy87CPn-1EfirI5J77oxS-d-ABvxxynki53m68sVIuo,5767
173
+ tpu_inference-0.12.0.dev20251213.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
174
+ tpu_inference-0.12.0.dev20251213.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
175
+ tpu_inference-0.12.0.dev20251213.dist-info/RECORD,,