tpu-inference 0.0.1rc1__py3-none-any.whl → 0.11.1.dev202511130813__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tpu-inference might be problematic. Click here for more details.

Files changed (67) hide show
  1. tests/kernels/fused_moe_v1_test.py +34 -303
  2. tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py +2 -2
  3. tests/lora/test_layers.py +6 -0
  4. tests/lora/utils.py +8 -0
  5. tests/test_utils.py +16 -24
  6. tpu_inference/__init__.py +3 -22
  7. tpu_inference/core/core_tpu.py +9 -17
  8. tpu_inference/core/disagg_utils.py +8 -6
  9. tpu_inference/distributed/tpu_connector.py +4 -3
  10. tpu_inference/distributed/utils.py +2 -3
  11. tpu_inference/envs.py +8 -61
  12. tpu_inference/executors/ray_distributed_executor.py +11 -31
  13. tpu_inference/kernels/fused_moe/v1/kernel.py +110 -641
  14. tpu_inference/kernels/ragged_paged_attention/v3/kernel.py +54 -77
  15. tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py +143 -287
  16. tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py +0 -7
  17. tpu_inference/layers/jax/attention/attention.py +1 -1
  18. tpu_inference/layers/{common → jax}/attention_interface.py +2 -8
  19. tpu_inference/layers/jax/sample/rejection_sampler.py +1 -1
  20. tpu_inference/layers/jax/sample/sampling.py +2 -2
  21. tpu_inference/layers/{common → jax}/sharding.py +5 -5
  22. tpu_inference/layers/vllm/attention.py +1 -1
  23. tpu_inference/layers/vllm/fused_moe.py +208 -170
  24. tpu_inference/layers/vllm/quantization/__init__.py +3 -7
  25. tpu_inference/layers/vllm/quantization/awq.py +3 -4
  26. tpu_inference/layers/vllm/quantization/common.py +1 -6
  27. tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py +2 -4
  28. tpu_inference/layers/vllm/quantization/unquantized.py +67 -62
  29. tpu_inference/layers/vllm/sharding.py +2 -2
  30. tpu_inference/lora/torch_punica_tpu.py +2 -1
  31. tpu_inference/mock/__init__.py +0 -0
  32. tpu_inference/mock/vllm_config_utils.py +28 -0
  33. tpu_inference/mock/vllm_envs.py +1219 -0
  34. tpu_inference/mock/vllm_logger.py +212 -0
  35. tpu_inference/mock/vllm_logging_utils.py +15 -0
  36. tpu_inference/models/common/model_loader.py +12 -46
  37. tpu_inference/models/jax/llama3.py +3 -4
  38. tpu_inference/models/jax/llama_eagle3.py +5 -8
  39. tpu_inference/models/jax/phi3.py +376 -0
  40. tpu_inference/models/jax/qwen2.py +2 -3
  41. tpu_inference/models/jax/qwen2_5_vl.py +50 -165
  42. tpu_inference/models/jax/qwen3.py +2 -3
  43. tpu_inference/models/jax/utils/quantization/quantization_utils.py +6 -3
  44. tpu_inference/models/jax/utils/weight_utils.py +143 -198
  45. tpu_inference/models/vllm/vllm_model_wrapper.py +14 -32
  46. tpu_inference/platforms/tpu_platform.py +34 -47
  47. tpu_inference/runner/compilation_manager.py +60 -145
  48. tpu_inference/runner/kv_cache.py +2 -2
  49. tpu_inference/runner/kv_cache_manager.py +18 -17
  50. tpu_inference/runner/persistent_batch_manager.py +2 -40
  51. tpu_inference/runner/structured_decoding_manager.py +3 -2
  52. tpu_inference/runner/tpu_runner.py +135 -283
  53. tpu_inference/runner/utils.py +2 -2
  54. tpu_inference/spec_decode/jax/eagle3.py +21 -71
  55. tpu_inference/tpu_info.py +3 -4
  56. tpu_inference/utils.py +15 -38
  57. tpu_inference/worker/tpu_worker.py +26 -163
  58. {tpu_inference-0.0.1rc1.dist-info → tpu_inference-0.11.1.dev202511130813.dist-info}/METADATA +3 -4
  59. {tpu_inference-0.0.1rc1.dist-info → tpu_inference-0.11.1.dev202511130813.dist-info}/RECORD +63 -61
  60. tests/test_envs.py +0 -203
  61. tpu_inference/layers/common/quant_methods.py +0 -8
  62. tpu_inference/layers/vllm/quantization/mxfp4.py +0 -331
  63. tpu_inference/models/jax/llama_guard_4.py +0 -361
  64. /tpu_inference/layers/{common → jax}/binary_search.py +0 -0
  65. {tpu_inference-0.0.1rc1.dist-info → tpu_inference-0.11.1.dev202511130813.dist-info}/WHEEL +0 -0
  66. {tpu_inference-0.0.1rc1.dist-info → tpu_inference-0.11.1.dev202511130813.dist-info}/licenses/LICENSE +0 -0
  67. {tpu_inference-0.0.1rc1.dist-info → tpu_inference-0.11.1.dev202511130813.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tpu_inference
3
- Version: 0.0.1rc1
3
+ Version: 0.11.1.dev202511130813
4
4
  Author: tpu_inference Contributors
5
5
  Classifier: Development Status :: 3 - Alpha
6
6
  Classifier: Intended Audience :: Developers
@@ -14,7 +14,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Python: >=3.10
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
- Requires-Dist: tpu-info==0.7.1
17
+ Requires-Dist: tpu-info==0.4.0
18
18
  Requires-Dist: yapf==0.43.0
19
19
  Requires-Dist: pytest
20
20
  Requires-Dist: pytest-mock
@@ -27,11 +27,10 @@ Requires-Dist: jaxtyping
27
27
  Requires-Dist: flax==0.11.1
28
28
  Requires-Dist: torchax==0.0.7
29
29
  Requires-Dist: qwix==0.1.1
30
- Requires-Dist: torchvision==0.24.0
30
+ Requires-Dist: torchvision==0.23.0
31
31
  Requires-Dist: pathwaysutils
32
32
  Requires-Dist: parameterized
33
33
  Requires-Dist: numba==0.62.1
34
- Requires-Dist: runai-model-streamer[gcs,s3]==0.15.0
35
34
  Dynamic: author
36
35
  Dynamic: classifier
37
36
  Dynamic: description
@@ -1,9 +1,8 @@
1
1
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  tests/test_base.py,sha256=Ct5WFRMHL7IHEIxk8FrzAvO8m0xFuDpzDBKkAKKAL2Q,7341
3
- tests/test_envs.py,sha256=h502VxL2gvhECm8u5uDh5JTGvhFf_DfQO88SpqOFMzE,7135
4
3
  tests/test_quantization.py,sha256=IT5ASyS1uuWcxc22kRtBcA-V4j3Z3hb7pMztm3GOlBs,34445
5
4
  tests/test_tpu_info.py,sha256=ZrwlMsp8ffITkS_b8Q1t_QG-a-WVAd4NUcjHhGibcsI,4670
6
- tests/test_utils.py,sha256=GIXLdd-x4gnqSLrySXGk22phqPc8MegFd7ph1Jj8OcU,8182
5
+ tests/test_utils.py,sha256=szRg4UB36RcgIvbEd9xMhKYbWi-O4XAUWGJlIU6FJ9E,7983
7
6
  tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
7
  tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
9
8
  tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
@@ -11,37 +10,37 @@ tests/core/test_disagg_utils.py,sha256=alktTGppaGdg-_un0Amz8Y0IDQz-xNJN0dXG-YApE
11
10
  tests/core/test_dp_scheduler.py,sha256=IwCR1Vs96V4CQdWA051rNaYxxr2V_byA1yx9HWyRoMg,37339
12
11
  tests/core/test_init.py,sha256=NEFI5A9eKGu4rmeJ2iqd0EmhlA3bzbVkXmMi1PV1b9U,1687
13
12
  tests/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- tests/kernels/fused_moe_v1_test.py,sha256=sQ6gvpti94fpPYrSZn7frPPNjqbVmRibFtenVrGGCA4,10403
13
+ tests/kernels/fused_moe_v1_test.py,sha256=c6zbSHQDzOseeyL9VCjQeP7zayNnwYf059CPlKcvZzQ,3137
15
14
  tests/kernels/mla_v1_test.py,sha256=oZc4TCgquiG0KOeWfv46yJbUIpro_CgCMFc7vzyB7t8,11646
16
15
  tests/kernels/quantized_matmul_kernel_test.py,sha256=od5-zXFjcsc_gWGRDrREL8E_ftymNniQVTzgtkBo_Gc,5679
17
16
  tests/kernels/ragged_kv_cache_update_v2_test.py,sha256=6-HjP5CoUG-kcuP8MS-JJVMiBnPRo_zadS3VInnO0D4,10821
18
17
  tests/kernels/ragged_paged_attention_kernel_v2_test.py,sha256=pWqo9UYF0tzwgBKO_xYw-TYSPrtAsKcMK5Haj8hFG7I,11340
19
- tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py,sha256=vLfe1I_vLdf0SqtBuBL7QHLSklrhWOOzYF-I_I3rdNo,16309
18
+ tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py,sha256=JhIElqUZIRqIsfQ3U1RUzSiH_gz_SabAqDosGGZ2tlA,16321
20
19
  tests/kernels/ragged_paged_attention_kernel_v3_test.py,sha256=Hrd8iUkS1pS3rxeTyY53aYRg_ZL_d3NqgBXvOgnigSU,14838
21
20
  tests/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
21
  tests/lora/conftest.py,sha256=EXjwE1CjmUUlMEXpyE3UwxvgrKUllE73I8BNKfP1FTc,984
23
22
  tests/lora/test_bgmv.py,sha256=gQxWsJdNX2nkrE2xyrG0exwf3E2eHm2k2nkEXoANuQc,1359
24
- tests/lora/test_layers.py,sha256=6B4HhMAItQmt0hPAQgyXgwSYs7b3bIbUf6LaPsqXLzY,25923
23
+ tests/lora/test_layers.py,sha256=21ekYlsK36r1GPZOfzs7E-KIsfI1JcuZl1E6vaQbHf4,26273
25
24
  tests/lora/test_lora.py,sha256=wJiF1P1BDnPN8TLX2tlFtdZ_QCkV-S9nPl6_uR6DqFc,4439
26
- tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
27
- tpu_inference/__init__.py,sha256=p4MaepRdN7723FUNE-3pOMxZWjFn4_TVFgjrNyty4JE,2304
25
+ tests/lora/utils.py,sha256=dR_v1H20vPVjFHdBhDajWOz0WJZlKuPLgMFQsME0LtA,3009
26
+ tpu_inference/__init__.py,sha256=7IduGWw-_fwx0VA6EvC_AqHF67fnnShz6YvkqCfvFx8,1317
28
27
  tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc,365
29
- tpu_inference/envs.py,sha256=ugze6VdQ_hG1IxUCbcgXZq7a22fZ-Lora3V_fkFOefw,5714
28
+ tpu_inference/envs.py,sha256=MTT_Pdtd6cAcciYjv1OekEmvspaq3SYL0oR_jDkQ_aE,3948
30
29
  tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
31
- tpu_inference/tpu_info.py,sha256=3iilHRQSFjwMJwhKcuuawTm7mhwkgHbj4zi6CiAySrs,2265
32
- tpu_inference/utils.py,sha256=mHbjI8fxInPxagLsSUg-R3DzSz-X7WYNdoorPYoE3hg,10855
30
+ tpu_inference/tpu_info.py,sha256=9UohshkndR6dZpGWpWXfTD4qvIVdVgHf0yOoSEkLTrw,2276
31
+ tpu_inference/utils.py,sha256=LWEshJgUdB20H2fDA-QI-Sk4EP7PD_FWvW3Mrqb-k8M,10054
33
32
  tpu_inference/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
33
+ tpu_inference/core/core_tpu.py,sha256=JdN4-xaxSWnzY4T181SCnbZ5HEnwQ5IifYA9ybF4pWo,32710
35
34
  tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
36
- tpu_inference/core/disagg_utils.py,sha256=lv8MAVoAjtcmTaenUXVokg2q3d0tzsma86UiQlQ3omY,1492
35
+ tpu_inference/core/disagg_utils.py,sha256=ufWNFWQ5n4YnZpPOtoReHlYo4dlN7AbIqCyqS4an0t4,1572
37
36
  tpu_inference/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
37
  tpu_inference/core/sched/dp_scheduler.py,sha256=mKs8Ms46szdlBfo8hjdqis2ZKAZbcKnHAGfEr0X5R8g,22527
39
38
  tpu_inference/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
39
  tpu_inference/distributed/jax_parallel_state.py,sha256=5_xCwcL03lFPUoSO_OP7hIVKpUFroW1m-jVO7R6FbUc,2223
41
- tpu_inference/distributed/tpu_connector.py,sha256=kLaTwy6BrAThJeFkd1soJ47bBo5iGp4GjUJs7xFx4Tg,29696
42
- tpu_inference/distributed/utils.py,sha256=1KIREn28Zg10O-MSUkVQMRzS09WoGc_VLGOX4QTFJac,1504
40
+ tpu_inference/distributed/tpu_connector.py,sha256=Zah46Sm5iOuh72SzXw69NxMc0MLnqsLEpe2BfDhpnqA,29731
41
+ tpu_inference/distributed/utils.py,sha256=RwFQi8G4TzN1g9RjQu0pb5JxSc_jhoIZVsFJo0uHjxo,1513
43
42
  tpu_inference/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- tpu_inference/executors/ray_distributed_executor.py,sha256=9CnzWb8aurH1B0tJfMHB73F-RQBGqSf5DnymetBvZ5o,16225
43
+ tpu_inference/executors/ray_distributed_executor.py,sha256=UgJP-XSgDPKDj_mkVQ16XrRN96juVpnFl6fdWEyFL_Y,15249
45
44
  tpu_inference/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
45
  tpu_inference/experimental/llama3_jax_stashed.py,sha256=YK1oSIfto9ALo-HB45XfSrbq9XgVbE4m2C-9zRwmSzI,10913
47
46
  tpu_inference/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,7 +52,7 @@ tpu_inference/kernels/flash_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
53
52
  tpu_inference/kernels/flash_attention/kernel.py,sha256=n8gmAFVfchMXlyaSEj8xXJm6AadFt26edQihPRdithY,25897
54
53
  tpu_inference/kernels/fused_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
54
  tpu_inference/kernels/fused_moe/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- tpu_inference/kernels/fused_moe/v1/kernel.py,sha256=xVXfclgbw_3U7c5W1azDFkFDK5FolBzDN9IL0rIzLQs,62813
55
+ tpu_inference/kernels/fused_moe/v1/kernel.py,sha256=QHB0QEvC3x_6zhwz06JQpaOncQcNAhOSV92dD5tGVq8,40869
57
56
  tpu_inference/kernels/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
57
  tpu_inference/kernels/mla/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
58
  tpu_inference/kernels/mla/v1/kernel.py,sha256=dw1nhpL47uQxMFOIN2kENC6aITbalT81YZLAyr1usLU,51571
@@ -67,28 +66,27 @@ tpu_inference/kernels/ragged_paged_attention/v2/kernel.py,sha256=OiQGAHhyggbp1Pe
67
66
  tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py,sha256=vGp2ZWODTbjyG9z2z0Qf_BX-wYHd5bUybnc_DtOz0nI,10995
68
67
  tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py,sha256=mw80bXBGenroGdrITV0F_EaI2s-Z9KWwqU9WodvJg14,97919
69
68
  tpu_inference/kernels/ragged_paged_attention/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=O179Fft5KpuN5LIFx3SghWXJJUqh3Og-xqfO4Z8QXYU,57032
71
- tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=1ysmx7awuSZUnR7TcyUkARAvyMxNQS-9XRFMYnadZvk,61195
72
- tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=k3LwduhZO85cJ-pSgnGN0c2Nn8eNeQq4eA94KUXJzMw,142198
69
+ tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=tlP6121yfXaukx_RQroHlHcZnbKPyyum0lAcvT0B_Pk,56132
70
+ tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=DFVdIIKmyufu_4b-3YhxI56jt0O1cJ3JsVl-2DDZHv4,55350
71
+ tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=leTS75aq99N1Zuv6wB5yLdkfYnEtrBDVI4z_jOKnjL0,142012
73
72
  tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py,sha256=P3_ivi8iUz5QMU_3pgpl4Bkbmn0q0NpDtVJX39haRQA,11208
74
73
  tpu_inference/kernels/ragged_paged_attention/v3/util.py,sha256=1N_ozjKboDYLteFJndWoLXNudj2z53rGXMkELa5Z9tY,1102
75
74
  tpu_inference/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
75
  tpu_inference/layers/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- tpu_inference/layers/common/attention_interface.py,sha256=SQZ-1I32Jqg7GGI-z4BVibXbaitJHyTs26X3B5nBRVo,13369
78
76
  tpu_inference/layers/common/attention_metadata.py,sha256=St8ZatbY1D7xQACKJH459jMgp3oTP3AQ36mi9FZdrPU,850
79
- tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
80
- tpu_inference/layers/common/quant_methods.py,sha256=mQSxZ44-QQtm22C_8ViejnP1cP2Dv6yc2YaP6oMKJeQ,185
81
- tpu_inference/layers/common/sharding.py,sha256=sjbwkDr2fP26Ob8f5cSDeDifr3eWFZMDHU4MKr7pIgQ,25217
82
77
  tpu_inference/layers/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ tpu_inference/layers/jax/attention_interface.py,sha256=1jlvSZWaP6DuPVtb1W_KPw4-Qi68BikOBNLLcpygupY,13221
83
79
  tpu_inference/layers/jax/base.py,sha256=Vhts6ZMwNCZ8LbnEXeB0rl3nHdS5hDJWX7HEa7Fl7yE,5775
80
+ tpu_inference/layers/jax/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
84
81
  tpu_inference/layers/jax/constants.py,sha256=NcYg0zAf3ClfP7YMYdYu_F1GngOzZaIxIAHBZDunKw4,2755
85
82
  tpu_inference/layers/jax/layers.py,sha256=yv_lC2tbJuzVL-OaXYooX82Ys8hWZATeH9M78coJ3VI,10633
86
83
  tpu_inference/layers/jax/misc.py,sha256=znKv1Nuq_LgYpaIu0qlzUVDgQWnjjG7aqPJGM8kuwcw,566
87
84
  tpu_inference/layers/jax/rope.py,sha256=i2E7pRLWgOaFLbeo8_phZwKQWJW7ohAyl69E2V2Mc2U,11349
88
85
  tpu_inference/layers/jax/rope_interface.py,sha256=X0SruXizlCHGnssFujC1pL07UC4Vsp7-gdBy_Q7JZhI,8375
86
+ tpu_inference/layers/jax/sharding.py,sha256=wBqdkXZSWfnnH8pkJtyW2DSqmAe_V4Vxi0iMPaXq0Z0,25185
89
87
  tpu_inference/layers/jax/transformer_block.py,sha256=ufv-yfVDmRP_Ynrx3UX9xj-x0PkNw_tQ-0N0eYf4i7M,3917
90
88
  tpu_inference/layers/jax/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- tpu_inference/layers/jax/attention/attention.py,sha256=DJFDkpQc9SDD156wVPFw3r2XaBgb44QNJ8OcdONaF5g,10085
89
+ tpu_inference/layers/jax/attention/attention.py,sha256=bWXMtF8TToiRyZ3SvJGQnD2urZTbuX_omHVXpQdn0fs,10082
92
90
  tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=YlagoBMwINv2KRH1dr4oEcH_cQ9QMPB55nO2FQZsWs0,14010
93
91
  tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=rkrEv4aNZxtAGcXd1HXHUxhNeDNAd9nWTEZOKWSI8cA,8725
94
92
  tpu_inference/layers/jax/attention/llama4_attention.py,sha256=VvUmfBxQEbHf3F2BrcYDUnq5abj7CSDYeRsNx_eVAh0,6162
@@ -97,78 +95,82 @@ tpu_inference/layers/jax/moe/deepseek_v3_moe.py,sha256=Q6CuwwiZtWYm6iUee1wJoDJrw
97
95
  tpu_inference/layers/jax/moe/gpt_oss_moe.py,sha256=Rx5b1jg2XMm7Xx9hrjgvyhscaJ_zGbVMHmeEiLh7kIQ,6196
98
96
  tpu_inference/layers/jax/moe/moe.py,sha256=cA8R1rjbBwNEoNlsPWjeIBB9nvaRDwlEdwQTVg6lTpY,8762
99
97
  tpu_inference/layers/jax/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
- tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=nI5s0E73xkqDIu2hTljIXt23B1Q-gRnC1myoQpGDJrQ,20426
101
- tpu_inference/layers/jax/sample/sampling.py,sha256=C30KgmdOVSaagvHhbfLgVJtVQmJo86CbHPa4h36Vn70,3314
98
+ tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=IRfVWjkbVXp9Sv1YrGMMh-LYx1AwbY-3FTXEO1-Ue9g,20423
99
+ tpu_inference/layers/jax/sample/sampling.py,sha256=dVOcMdmPdAEsupPk96tCaZecIWUiDej0DiVnwaH9ckQ,3308
102
100
  tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=Gd835LNWfGM0NRQBVBqEv0nPwt5q9F4AdFym0CUS1fw,2561
103
101
  tpu_inference/layers/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
- tpu_inference/layers/vllm/attention.py,sha256=wbJpcgqEAuIirv5PIULbiP-ggMKjmTanbB7Dg0BVYv4,7366
105
- tpu_inference/layers/vllm/fused_moe.py,sha256=qGbQoCq-sdcZj_Q0kP6RzQk7_YvcX7FopkpLcerjNFM,17819
102
+ tpu_inference/layers/vllm/attention.py,sha256=JxEQ8ql_97zbQzukIbfUYq50-2k81VUG1Km_YV_RUtg,7363
103
+ tpu_inference/layers/vllm/fused_moe.py,sha256=XZt2CPUz00qZzDcyfBFz6buhVzmGL1amHalHJALl9zw,18945
106
104
  tpu_inference/layers/vllm/linear_common.py,sha256=_YlJtbdaYcck_j-gFLos_k0ycktVWxT8Qo57tR2YqJ8,7749
107
- tpu_inference/layers/vllm/sharding.py,sha256=as7CF8UKTF3ToymwRY5Pi8uzwJk0P1sHPkWB5xEx3mA,9169
108
- tpu_inference/layers/vllm/quantization/__init__.py,sha256=SEppGayBzzQ5tsXLSy99aqilkAawQwYxnv2alCg6-ZU,1777
109
- tpu_inference/layers/vllm/quantization/awq.py,sha256=-8ZmjGvSKJB6_JuwSctNWt8xHWq4VSvK_AK9iahlgCo,8495
110
- tpu_inference/layers/vllm/quantization/common.py,sha256=8XD64pPa077c9HThFhLFVHlDL9YBafnYwp6rp6gR44E,4432
111
- tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=UT6gpMrH27CusdGUMqEvQpJg1CPvsvnqAe0GKfZdV6o,13596
112
- tpu_inference/layers/vllm/quantization/unquantized.py,sha256=YaZdO_XjT06U1gtsUgNVSF1BrFqc4sCGO0dgtprUtwM,14395
105
+ tpu_inference/layers/vllm/sharding.py,sha256=WTx1tF_7R99AdyE-lL7HQJ378hAafeI-JVRsugAvwn4,9177
106
+ tpu_inference/layers/vllm/quantization/__init__.py,sha256=Tz44kUZTdNFu5Dmu48aQ-9f7ioWjbUWS0eVYURXZ17E,1535
107
+ tpu_inference/layers/vllm/quantization/awq.py,sha256=ar8x1CPTPvfcf4wbuBC1XVh4pjtSUchoYWnbkZKH3CQ,8412
108
+ tpu_inference/layers/vllm/quantization/common.py,sha256=wm3pge6XMTMsLK7_SSdgBP0PvQzz-1mrqN2I6xMqzrc,4218
109
+ tpu_inference/layers/vllm/quantization/unquantized.py,sha256=id6d_IZIhDIvmaH3ANtmLiy4U_uY_AYAf4KTvfs3nmc,14900
113
110
  tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=6idEyy3e849fZ1UeNvc9eSHYX7e6qvohrJa_d_D9MBk,5285
111
+ tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=uKaauZhaRDcMqd8_NyQoFs9BazMOFix3nIuutbLHHbU,5123
115
112
  tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=FM901QhyhJRC8CuMeICzCVVERvBHbhruRxYW0EQ570s,8820
116
113
  tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
114
  tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=6sQvsxiWdi5Vte8V9vrQ2abaqGqWpq-mtzU7lGAo-ac,8759
118
115
  tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=4y7lYgybpXszpCAtxGFhR8LDEbEoCCeo3DfUSOXxhaQ,5202
119
116
  tpu_inference/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
120
117
  tpu_inference/lora/torch_lora_ops.py,sha256=pr3N7DVfkn3ANijUC6dBoiCtIJW4fdJpKdC3zWBUsxE,3121
121
- tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
118
+ tpu_inference/lora/torch_punica_tpu.py,sha256=b27DpmIS_N5bhlIcryiENYNmPxp_cu40CGxjPW64d44,12706
119
+ tpu_inference/mock/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
120
+ tpu_inference/mock/vllm_config_utils.py,sha256=FlQshLjoHdgs3C66tYHYbKFUjbk9DhUwY-7HibZk0fI,878
121
+ tpu_inference/mock/vllm_envs.py,sha256=cCubeOhH2WeYZQFJt6W0y_IiQo0fzIWR1LCCE8i6kI4,50990
122
+ tpu_inference/mock/vllm_logger.py,sha256=vUGnN5nKT--ZvU15YCzODUM_FGiXKhcrrjDGjeN00RQ,7297
123
+ tpu_inference/mock/vllm_logging_utils.py,sha256=TEUmKj3xHiLzHBnFqAujcxH0t2hBQ04sUaho2RyORnk,486
122
124
  tpu_inference/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
125
  tpu_inference/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
- tpu_inference/models/common/model_loader.py,sha256=b3aigca81gMVJt42oF2aoRohQHjBBe3oK3IPblZAaUM,19996
126
+ tpu_inference/models/common/model_loader.py,sha256=AwukmGaUq2wv3OnFHUU-nwdAnKLG_eGw7PYY5CNrNNI,18225
125
127
  tpu_inference/models/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
128
  tpu_inference/models/jax/deepseek_v3.py,sha256=SKOHVEC-_2NLxBnzBzbu5tu0d6FTlAEiI1EefGaO2QE,40047
127
129
  tpu_inference/models/jax/gpt_oss.py,sha256=Vw4LRB5Kp6hbA2hjZGFS8kiEqOCjf881XH2JNtu2S1I,20924
128
130
  tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=Pxu1PCV5LN5X58aYVkPiohcXZIeKVim2oqvrS_cVgw4,2604
129
- tpu_inference/models/jax/llama3.py,sha256=ZiFtrpAzXTT9vAPES9UeuJInCWGbvDWs7g0_JLdCCa4,13479
131
+ tpu_inference/models/jax/llama3.py,sha256=YUG0S0Y6cy7PLcq0cpmDsGWbOZIhZzzyObRQdmUUxkg,13420
130
132
  tpu_inference/models/jax/llama4.py,sha256=wf2Sp2iYViaYD5rSfv3_ryO6gYuYM5XaOyvghaP4OCY,29631
131
- tpu_inference/models/jax/llama_eagle3.py,sha256=7-U99yvBkle-FSZ3NDDI-obWSQ2Fo2OTOi1H67H4jxY,12476
132
- tpu_inference/models/jax/llama_guard_4.py,sha256=LrnU2zBWM0s4q_5dwmR--OO0V7ttltsYhrHYlBgQVIw,15275
133
- tpu_inference/models/jax/qwen2.py,sha256=SuAp7tErk8OoIRko0Vt6QSOZP_9B9r5GTfqmVfImUIo,13410
134
- tpu_inference/models/jax/qwen2_5_vl.py,sha256=WUOmqNE6fHQ8PGU85Y8Bt6-CtCC1Uubbox_9FdpDMMo,49833
135
- tpu_inference/models/jax/qwen3.py,sha256=CIZQKjZDke_LPGsLNhRCJdDTzWueUneBPAQ1blS24IM,11050
133
+ tpu_inference/models/jax/llama_eagle3.py,sha256=STUkAK6XEA7JM3i_Lx36-t5BhkAGeW_xYiq3zYhHP1A,12297
134
+ tpu_inference/models/jax/phi3.py,sha256=Oz68PE2Z1t8wTed95_w0KMIXfnfV72ZwXugNOdWOV5w,13576
135
+ tpu_inference/models/jax/qwen2.py,sha256=RYb0hMKzPnFOAyhqbztoNlSrFIlRa74fYqSNecA2VOY,13354
136
+ tpu_inference/models/jax/qwen2_5_vl.py,sha256=J4-AjeS_igJdxYCjTwS0HShiEfwQUMwrHxjlWvMw0ok,43939
137
+ tpu_inference/models/jax/qwen3.py,sha256=SOL-Pvp56IrMxqXpPf5EFacBI6AJNlqf4Zrr1pkabGw,10994
136
138
  tpu_inference/models/jax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
139
  tpu_inference/models/jax/utils/file_utils.py,sha256=NOuSC3YFnZpf3CZgYdghbbiNYJt42zgjlEYbOZIVct4,2840
138
140
  tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=rrIrQWidkUnGilBHKNpdYh7_2BkvnAaqanXjC81GNcg,6156
139
- tpu_inference/models/jax/utils/weight_utils.py,sha256=qFU53jPHPvIcs_EOdIH80oNojpUp7GdSY2E6NZNsjvM,21376
141
+ tpu_inference/models/jax/utils/weight_utils.py,sha256=65-H8BTbyilIBMBfvWjkkW3mf4soYASbhrJFqbFKzL4,20129
140
142
  tpu_inference/models/jax/utils/quantization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
141
143
  tpu_inference/models/jax/utils/quantization/mxfp4_utils.py,sha256=boGnqJCRIOf5nedAxQ8_IUTV6Rfll10DXnRC40BeeE8,3682
142
- tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=rzAFU3OtQvg8w8ow0V15rMljAsa4SBrwOye6OI8Bty4,26530
144
+ tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=xgKoKB7AM3TYPxzVgEGLTK9ebQH2Kx8mNuO0heovkmk,26778
143
145
  tpu_inference/models/jax/utils/quantization/configs/fp8_all_modules_w_only.yaml,sha256=d_YHPtaRJ_7PBrPijSzJGnVeoJO62tKIGqrgFqpYT1k,137
144
146
  tpu_inference/models/jax/utils/quantization/configs/fp8_default.yaml,sha256=b7SyL75HuSTj3fN9_ZLCK_CDiccL5DGq_DddGmxj_qk,170
145
147
  tpu_inference/models/jax/utils/quantization/configs/int8_all_modules_w_only.yaml,sha256=0Qwij71zj9k6rmrUNd8Q5df9YYfkoJ1ZkgMAHxQy81k,128
146
148
  tpu_inference/models/jax/utils/quantization/configs/int8_default.yaml,sha256=lGec0UwwxmNPNgKPSsTsCMSXNJjhw507KMtM2NsSCMw,152
147
149
  tpu_inference/models/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
- tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=3EcaD_1vZuyAZBfDtm5u_qfCahQU28qR4rAUraNAFqs,12305
150
+ tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=ERxj-cm-pmYpT9eiL-E3OxeaQDEDrH_Vs0iUS9nCU9s,11424
149
151
  tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=yxlJHPmRQIAwlb1MmHK3xfXokgIkJ-evNU4PgyoJUdg,1187
150
152
  tpu_inference/platforms/__init__.py,sha256=lQCrKddS_GcGpCbeogvz9zOZD1mQw5bBsiw8On46qFQ,74
151
- tpu_inference/platforms/tpu_platform.py,sha256=q_eACjDkJkmnrUrKQzfK6hyqGEf2OjWn16-JHXwWquY,10723
153
+ tpu_inference/platforms/tpu_platform.py,sha256=bdo_zlRqrhccpaz6zOdH18cU8kq6tGKgR1xJJehsVrc,10131
152
154
  tpu_inference/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
155
  tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
154
- tpu_inference/runner/compilation_manager.py,sha256=dU0Yk8f0LtRTBe2q0iB3xcMSRco_WPsj2wS6zZJ8WhY,40375
156
+ tpu_inference/runner/compilation_manager.py,sha256=pJFFLkFVmhXukBIxGRUo-hrOqx8jl8JIUuS36fZ2yvg,36177
155
157
  tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
156
- tpu_inference/runner/kv_cache.py,sha256=LKOZM5o8_62KDXhhYzQl2ibifgxN89ZxHvB1NT9u3MQ,4577
157
- tpu_inference/runner/kv_cache_manager.py,sha256=N0a896CE7Zrs_d4ZSSzRdqgjV1It57RBDSIpOzkRqro,22013
158
+ tpu_inference/runner/kv_cache.py,sha256=i54EbGQB-9bbOgk6KibTpJpTE2pfFuTfis7J1P_UB0M,4574
159
+ tpu_inference/runner/kv_cache_manager.py,sha256=CJxXtdWuewJqcTBMoR70_Uvwxjtc3cK2jxe1KpI9kQc,22152
158
160
  tpu_inference/runner/lora_utils.py,sha256=B4xMCgXGJ4VNdePvn89HH3tIZ-gYsQ7Vq_YCiYIATEY,3843
159
161
  tpu_inference/runner/multimodal_manager.py,sha256=azEPdHOwz8CN11MQmorGdtrCLbFaTCxdWyuEsZTzjYM,9778
160
- tpu_inference/runner/persistent_batch_manager.py,sha256=Otu67vOTf1_HKAMZgPDDHlRvvZ3YVJdz-QderH4qOII,13263
162
+ tpu_inference/runner/persistent_batch_manager.py,sha256=KERSfKy6XjMejnbtPGI3hzoYAHJLeCxmpZVYPqBCago,11156
161
163
  tpu_inference/runner/speculative_decoding_manager.py,sha256=I3FDWKh2dn6nV8LgTGfCTwMKYnxQsTPpBIrmaJngXHs,10215
162
- tpu_inference/runner/structured_decoding_manager.py,sha256=gZQKQUFxh6xYYH9eGTdbguqk8hc2WwTrIdMMuCcbymE,3573
163
- tpu_inference/runner/tpu_runner.py,sha256=NBDKfSGShHmYpudrtGfo1hnVSQTcLpZV_nPiXEo7JPQ,79439
164
- tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
164
+ tpu_inference/runner/structured_decoding_manager.py,sha256=Y0ERPhj4olFh6Y2TxP0R1_4UIJwy7nemYA-h63YIR2U,3622
165
+ tpu_inference/runner/tpu_runner.py,sha256=5vPFey3KFnh5lczyj4cIT3mVhR8RuX8kbcuHVOg8DAg,72318
166
+ tpu_inference/runner/utils.py,sha256=ZnWUoNo-7INeB0mdXti1jwUOdbmxyExznOs-crRTQLk,17126
165
167
  tpu_inference/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
168
  tpu_inference/spec_decode/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
- tpu_inference/spec_decode/jax/eagle3.py,sha256=FxP0uWeQlHlgCpt1nY3FUd4lKlegKJljHyc05jJucaQ,19104
169
+ tpu_inference/spec_decode/jax/eagle3.py,sha256=A1dt-dmBttpy-5DGcL4noEDCB0OGP8Xo6MXqgJvWIo8,16593
168
170
  tpu_inference/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
- tpu_inference/worker/tpu_worker.py,sha256=LnZcSNxdhh0NkoWXxS5bZ0bsTMduSANehy2wELAaVsY,20672
170
- tpu_inference-0.0.1rc1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
171
- tpu_inference-0.0.1rc1.dist-info/METADATA,sha256=Ckyu7tcPAfxr698v8vDxUI70CyEVWLVDvUMFFcgqYYQ,5503
172
- tpu_inference-0.0.1rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
173
- tpu_inference-0.0.1rc1.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
174
- tpu_inference-0.0.1rc1.dist-info/RECORD,,
171
+ tpu_inference/worker/tpu_worker.py,sha256=KY7fH--NP7jiTduP5m0gDnmB2LbhIel0Ts37XmjYpPM,14207
172
+ tpu_inference-0.11.1.dev202511130813.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
173
+ tpu_inference-0.11.1.dev202511130813.dist-info/METADATA,sha256=LARdH4AAJfZrrU2Pj4EIN8Zl0QLjzEpzkRCqBbeUdT8,5465
174
+ tpu_inference-0.11.1.dev202511130813.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
175
+ tpu_inference-0.11.1.dev202511130813.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
176
+ tpu_inference-0.11.1.dev202511130813.dist-info/RECORD,,
tests/test_envs.py DELETED
@@ -1,203 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- # SPDX-FileCopyrightText: Copyright contributors to the tpu-inference project
3
-
4
- import pytest
5
-
6
- import tpu_inference.envs as envs
7
- from tpu_inference.envs import enable_envs_cache, environment_variables
8
-
9
-
10
- def test_getattr_without_cache(monkeypatch: pytest.MonkeyPatch):
11
- assert envs.JAX_PLATFORMS == ""
12
- assert envs.PHASED_PROFILING_DIR == ""
13
- monkeypatch.setenv("JAX_PLATFORMS", "tpu")
14
- monkeypatch.setenv("PHASED_PROFILING_DIR", "/tmp/profiling")
15
- assert envs.JAX_PLATFORMS == "tpu"
16
- assert envs.PHASED_PROFILING_DIR == "/tmp/profiling"
17
-
18
- assert envs.TPU_NAME is None
19
- assert envs.TPU_ACCELERATOR_TYPE is None
20
- monkeypatch.setenv("TPU_NAME", "my-tpu")
21
- monkeypatch.setenv("TPU_ACCELERATOR_TYPE", "v5litepod-16")
22
- assert envs.TPU_NAME == "my-tpu"
23
- assert envs.TPU_ACCELERATOR_TYPE == "v5litepod-16"
24
-
25
- # __getattr__ is not decorated with functools.cache
26
- assert not hasattr(envs.__getattr__, "cache_info")
27
-
28
-
29
- def test_getattr_with_cache(monkeypatch: pytest.MonkeyPatch):
30
- monkeypatch.setenv("JAX_PLATFORMS", "tpu")
31
- monkeypatch.setenv("TPU_NAME", "my-tpu")
32
-
33
- # __getattr__ is not decorated with functools.cache
34
- assert not hasattr(envs.__getattr__, "cache_info")
35
-
36
- enable_envs_cache()
37
-
38
- # __getattr__ is decorated with functools.cache
39
- assert hasattr(envs.__getattr__, "cache_info")
40
- start_hits = envs.__getattr__.cache_info().hits
41
-
42
- # 2 more hits due to JAX_PLATFORMS and TPU_NAME accesses
43
- assert envs.JAX_PLATFORMS == "tpu"
44
- assert envs.TPU_NAME == "my-tpu"
45
- assert envs.__getattr__.cache_info().hits == start_hits + 2
46
-
47
- # All environment variables are cached
48
- for environment_variable in environment_variables:
49
- envs.__getattr__(environment_variable)
50
- assert envs.__getattr__.cache_info(
51
- ).hits == start_hits + 2 + len(environment_variables)
52
-
53
- # Reset envs.__getattr__ back to non-cached version to
54
- # avoid affecting other tests
55
- envs.__getattr__ = envs.__getattr__.__wrapped__
56
-
57
-
58
- def test_boolean_env_vars(monkeypatch: pytest.MonkeyPatch):
59
- # Ensure clean environment for boolean vars by setting to default "0"
60
- monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "0")
61
- monkeypatch.setenv("VLLM_XLA_CHECK_RECOMPILATION", "0")
62
- monkeypatch.setenv("NEW_MODEL_DESIGN", "0")
63
- monkeypatch.setenv("USE_MOE_EP_KERNEL", "0")
64
-
65
- # Test SKIP_JAX_PRECOMPILE (default False)
66
- assert envs.SKIP_JAX_PRECOMPILE is False
67
- monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "1")
68
- assert envs.SKIP_JAX_PRECOMPILE is True
69
- monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "0")
70
- assert envs.SKIP_JAX_PRECOMPILE is False
71
-
72
- # Test VLLM_XLA_CHECK_RECOMPILATION (default False)
73
- assert envs.VLLM_XLA_CHECK_RECOMPILATION is False
74
- monkeypatch.setenv("VLLM_XLA_CHECK_RECOMPILATION", "1")
75
- assert envs.VLLM_XLA_CHECK_RECOMPILATION is True
76
- monkeypatch.setenv("VLLM_XLA_CHECK_RECOMPILATION", "0")
77
- assert envs.VLLM_XLA_CHECK_RECOMPILATION is False
78
-
79
- # Test NEW_MODEL_DESIGN (default False)
80
- assert envs.NEW_MODEL_DESIGN is False
81
- monkeypatch.setenv("NEW_MODEL_DESIGN", "1")
82
- assert envs.NEW_MODEL_DESIGN is True
83
-
84
- # Test USE_MOE_EP_KERNEL (default False)
85
- assert envs.USE_MOE_EP_KERNEL is False
86
- monkeypatch.setenv("USE_MOE_EP_KERNEL", "1")
87
- assert envs.USE_MOE_EP_KERNEL is True
88
-
89
-
90
- def test_integer_env_vars(monkeypatch: pytest.MonkeyPatch):
91
- # Ensure clean environment for integer vars by setting to defaults
92
- monkeypatch.setenv("PYTHON_TRACER_LEVEL", "1")
93
- monkeypatch.setenv("NUM_SLICES", "1")
94
-
95
- assert envs.PYTHON_TRACER_LEVEL == 1
96
- monkeypatch.setenv("PYTHON_TRACER_LEVEL", "3")
97
- assert envs.PYTHON_TRACER_LEVEL == 3
98
- monkeypatch.setenv("PYTHON_TRACER_LEVEL", "0")
99
- assert envs.PYTHON_TRACER_LEVEL == 0
100
-
101
- # Test NUM_SLICES (default 1)
102
- assert envs.NUM_SLICES == 1
103
- monkeypatch.setenv("NUM_SLICES", "2")
104
- assert envs.NUM_SLICES == 2
105
- monkeypatch.setenv("NUM_SLICES", "4")
106
- assert envs.NUM_SLICES == 4
107
-
108
-
109
- def test_model_impl_type_choices(monkeypatch: pytest.MonkeyPatch):
110
- # Test case sensitive choices
111
- monkeypatch.setenv("MODEL_IMPL_TYPE", "flax_nnx")
112
- assert envs.MODEL_IMPL_TYPE == "flax_nnx"
113
-
114
- monkeypatch.setenv("MODEL_IMPL_TYPE", "vllm")
115
- assert envs.MODEL_IMPL_TYPE == "vllm"
116
-
117
-
118
- def test_string_env_vars_defaults(monkeypatch: pytest.MonkeyPatch):
119
- monkeypatch.delenv("JAX_PLATFORMS", raising=False)
120
- monkeypatch.delenv("PREFILL_SLICES", raising=False)
121
- monkeypatch.delenv("DECODE_SLICES", raising=False)
122
-
123
- assert envs.JAX_PLATFORMS == ""
124
- assert envs.PREFILL_SLICES == ""
125
- assert envs.DECODE_SLICES == ""
126
- assert envs.PHASED_PROFILING_DIR == ""
127
-
128
-
129
- def test_none_default_env_vars(monkeypatch: pytest.MonkeyPatch):
130
- monkeypatch.delenv("TPU_ACCELERATOR_TYPE", raising=False)
131
- monkeypatch.delenv("TPU_NAME", raising=False)
132
- monkeypatch.delenv("TPU_WORKER_ID", raising=False)
133
-
134
- assert envs.TPU_ACCELERATOR_TYPE is None
135
- assert envs.TPU_NAME is None
136
- assert envs.TPU_WORKER_ID is None
137
-
138
-
139
- def test_ray_env_vars(monkeypatch: pytest.MonkeyPatch):
140
- assert envs.RAY_USAGE_STATS_ENABLED == "0"
141
- monkeypatch.setenv("RAY_USAGE_STATS_ENABLED", "1")
142
- assert envs.RAY_USAGE_STATS_ENABLED == "1"
143
-
144
- assert envs.VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE == "shm"
145
-
146
-
147
- def test_invalid_attribute_raises_error():
148
- with pytest.raises(AttributeError,
149
- match="has no attribute 'NONEXISTENT_VAR'"):
150
- _ = envs.NONEXISTENT_VAR
151
-
152
-
153
- def test_dir_returns_all_env_vars():
154
- env_vars = envs.__dir__()
155
- assert isinstance(env_vars, list)
156
- assert len(env_vars) == len(environment_variables)
157
- assert "JAX_PLATFORMS" in env_vars
158
- assert "TPU_NAME" in env_vars
159
- assert "SKIP_JAX_PRECOMPILE" in env_vars
160
- assert "VLLM_XLA_CHECK_RECOMPILATION" in env_vars
161
- assert "MODEL_IMPL_TYPE" in env_vars
162
-
163
-
164
- def test_tpu_multihost_env_vars(monkeypatch: pytest.MonkeyPatch):
165
- monkeypatch.setenv("TPU_WORKER_ID", "0")
166
- assert envs.TPU_WORKER_ID == "0"
167
-
168
- monkeypatch.setenv("TPU_MULTIHOST_BACKEND", "ray")
169
- assert envs.TPU_MULTIHOST_BACKEND == "ray"
170
-
171
-
172
- def test_disaggregated_serving_env_vars(monkeypatch: pytest.MonkeyPatch):
173
- monkeypatch.setenv("PREFILL_SLICES", "0,1,2,3")
174
- assert envs.PREFILL_SLICES == "0,1,2,3"
175
-
176
- monkeypatch.setenv("DECODE_SLICES", "4,5,6,7")
177
- assert envs.DECODE_SLICES == "4,5,6,7"
178
-
179
-
180
- def test_model_impl_type_default(monkeypatch: pytest.MonkeyPatch):
181
- monkeypatch.delenv("MODEL_IMPL_TYPE", raising=False)
182
- assert envs.MODEL_IMPL_TYPE == "flax_nnx"
183
-
184
-
185
- def test_cache_preserves_values_across_env_changes(
186
- monkeypatch: pytest.MonkeyPatch):
187
- monkeypatch.setenv("JAX_PLATFORMS", "tpu")
188
-
189
- enable_envs_cache()
190
-
191
- assert envs.JAX_PLATFORMS == "tpu"
192
-
193
- # Change environment variable
194
- monkeypatch.setenv("JAX_PLATFORMS", "cpu")
195
-
196
- # Cached value should still be "tpu"
197
- assert envs.JAX_PLATFORMS == "tpu"
198
-
199
- # Reset envs.__getattr__ back to non-cached version
200
- envs.__getattr__ = envs.__getattr__.__wrapped__
201
-
202
- # Now it should reflect the new value
203
- assert envs.JAX_PLATFORMS == "cpu"
@@ -1,8 +0,0 @@
1
- UNQUANTIZED = "unquantized"
2
- MXFP4 = "mxfp4"
3
- AWQ = "awq"
4
- COMPRESSED_TENSORS = "compressed-tensors"
5
-
6
-
7
- def get_tpu_quant_method(quant_method: str) -> str:
8
- return "tpu-" + quant_method