tpu-inference 0.11.1.dev202511150811__py3-none-any.whl → 0.11.1.dev202512030818__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tpu-inference might be problematic. Click here for more details.
- tests/kernels/fused_moe_v1_test.py +303 -34
- tests/lora/test_layers.py +0 -6
- tests/lora/utils.py +0 -8
- tests/test_envs.py +32 -11
- tests/test_utils.py +1 -2
- tpu_inference/__init__.py +22 -3
- tpu_inference/core/disagg_utils.py +6 -8
- tpu_inference/distributed/tpu_connector.py +3 -4
- tpu_inference/distributed/utils.py +3 -2
- tpu_inference/envs.py +61 -8
- tpu_inference/executors/ray_distributed_executor.py +31 -11
- tpu_inference/kernels/fused_moe/v1/kernel.py +641 -110
- tpu_inference/kernels/ragged_paged_attention/v3/kernel.py +77 -54
- tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py +213 -126
- tpu_inference/layers/common/attention_interface.py +7 -1
- tpu_inference/layers/common/sharding.py +5 -5
- tpu_inference/layers/vllm/fused_moe.py +74 -25
- tpu_inference/layers/vllm/quantization/common.py +6 -1
- tpu_inference/layers/vllm/quantization/mxfp4.py +137 -62
- tpu_inference/layers/vllm/quantization/unquantized.py +107 -113
- tpu_inference/layers/vllm/sharding.py +2 -2
- tpu_inference/lora/torch_punica_tpu.py +1 -2
- tpu_inference/models/common/model_loader.py +45 -11
- tpu_inference/models/jax/llama3.py +2 -1
- tpu_inference/models/jax/llama_eagle3.py +8 -5
- tpu_inference/models/jax/llama_guard_4.py +361 -0
- tpu_inference/models/jax/qwen2.py +2 -1
- tpu_inference/models/jax/qwen2_5_vl.py +163 -48
- tpu_inference/models/jax/qwen3.py +2 -1
- tpu_inference/models/jax/utils/quantization/quantization_utils.py +3 -6
- tpu_inference/models/jax/utils/weight_utils.py +198 -143
- tpu_inference/models/vllm/vllm_model_wrapper.py +14 -7
- tpu_inference/platforms/tpu_platform.py +28 -22
- tpu_inference/runner/compilation_manager.py +144 -59
- tpu_inference/runner/kv_cache_manager.py +17 -18
- tpu_inference/runner/persistent_batch_manager.py +40 -2
- tpu_inference/runner/structured_decoding_manager.py +2 -3
- tpu_inference/runner/tpu_runner.py +271 -147
- tpu_inference/runner/utils.py +2 -2
- tpu_inference/spec_decode/jax/eagle3.py +71 -21
- tpu_inference/tpu_info.py +4 -3
- tpu_inference/utils.py +36 -13
- tpu_inference/worker/tpu_worker.py +162 -25
- {tpu_inference-0.11.1.dev202511150811.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/METADATA +3 -2
- {tpu_inference-0.11.1.dev202511150811.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/RECORD +48 -53
- tpu_inference/mock/__init__.py +0 -0
- tpu_inference/mock/vllm_config_utils.py +0 -28
- tpu_inference/mock/vllm_envs.py +0 -1219
- tpu_inference/mock/vllm_logger.py +0 -212
- tpu_inference/mock/vllm_logging_utils.py +0 -15
- tpu_inference/models/jax/phi3.py +0 -376
- {tpu_inference-0.11.1.dev202511150811.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/WHEEL +0 -0
- {tpu_inference-0.11.1.dev202511150811.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/licenses/LICENSE +0 -0
- {tpu_inference-0.11.1.dev202511150811.dist-info → tpu_inference-0.11.1.dev202512030818.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
tests/test_base.py,sha256=Ct5WFRMHL7IHEIxk8FrzAvO8m0xFuDpzDBKkAKKAL2Q,7341
|
|
3
|
-
tests/test_envs.py,sha256=
|
|
3
|
+
tests/test_envs.py,sha256=h502VxL2gvhECm8u5uDh5JTGvhFf_DfQO88SpqOFMzE,7135
|
|
4
4
|
tests/test_quantization.py,sha256=IT5ASyS1uuWcxc22kRtBcA-V4j3Z3hb7pMztm3GOlBs,34445
|
|
5
5
|
tests/test_tpu_info.py,sha256=ZrwlMsp8ffITkS_b8Q1t_QG-a-WVAd4NUcjHhGibcsI,4670
|
|
6
|
-
tests/test_utils.py,sha256=
|
|
6
|
+
tests/test_utils.py,sha256=GIXLdd-x4gnqSLrySXGk22phqPc8MegFd7ph1Jj8OcU,8182
|
|
7
7
|
tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
|
|
9
9
|
tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
|
|
@@ -11,7 +11,7 @@ tests/core/test_disagg_utils.py,sha256=alktTGppaGdg-_un0Amz8Y0IDQz-xNJN0dXG-YApE
|
|
|
11
11
|
tests/core/test_dp_scheduler.py,sha256=IwCR1Vs96V4CQdWA051rNaYxxr2V_byA1yx9HWyRoMg,37339
|
|
12
12
|
tests/core/test_init.py,sha256=NEFI5A9eKGu4rmeJ2iqd0EmhlA3bzbVkXmMi1PV1b9U,1687
|
|
13
13
|
tests/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
tests/kernels/fused_moe_v1_test.py,sha256=
|
|
14
|
+
tests/kernels/fused_moe_v1_test.py,sha256=sQ6gvpti94fpPYrSZn7frPPNjqbVmRibFtenVrGGCA4,10403
|
|
15
15
|
tests/kernels/mla_v1_test.py,sha256=oZc4TCgquiG0KOeWfv46yJbUIpro_CgCMFc7vzyB7t8,11646
|
|
16
16
|
tests/kernels/quantized_matmul_kernel_test.py,sha256=od5-zXFjcsc_gWGRDrREL8E_ftymNniQVTzgtkBo_Gc,5679
|
|
17
17
|
tests/kernels/ragged_kv_cache_update_v2_test.py,sha256=6-HjP5CoUG-kcuP8MS-JJVMiBnPRo_zadS3VInnO0D4,10821
|
|
@@ -21,27 +21,27 @@ tests/kernels/ragged_paged_attention_kernel_v3_test.py,sha256=Hrd8iUkS1pS3rxeTyY
|
|
|
21
21
|
tests/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
tests/lora/conftest.py,sha256=EXjwE1CjmUUlMEXpyE3UwxvgrKUllE73I8BNKfP1FTc,984
|
|
23
23
|
tests/lora/test_bgmv.py,sha256=gQxWsJdNX2nkrE2xyrG0exwf3E2eHm2k2nkEXoANuQc,1359
|
|
24
|
-
tests/lora/test_layers.py,sha256=
|
|
24
|
+
tests/lora/test_layers.py,sha256=6B4HhMAItQmt0hPAQgyXgwSYs7b3bIbUf6LaPsqXLzY,25923
|
|
25
25
|
tests/lora/test_lora.py,sha256=wJiF1P1BDnPN8TLX2tlFtdZ_QCkV-S9nPl6_uR6DqFc,4439
|
|
26
|
-
tests/lora/utils.py,sha256=
|
|
27
|
-
tpu_inference/__init__.py,sha256=
|
|
26
|
+
tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
|
|
27
|
+
tpu_inference/__init__.py,sha256=p4MaepRdN7723FUNE-3pOMxZWjFn4_TVFgjrNyty4JE,2304
|
|
28
28
|
tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc,365
|
|
29
|
-
tpu_inference/envs.py,sha256=
|
|
29
|
+
tpu_inference/envs.py,sha256=ugze6VdQ_hG1IxUCbcgXZq7a22fZ-Lora3V_fkFOefw,5714
|
|
30
30
|
tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
|
|
31
|
-
tpu_inference/tpu_info.py,sha256=
|
|
32
|
-
tpu_inference/utils.py,sha256=
|
|
31
|
+
tpu_inference/tpu_info.py,sha256=3iilHRQSFjwMJwhKcuuawTm7mhwkgHbj4zi6CiAySrs,2265
|
|
32
|
+
tpu_inference/utils.py,sha256=mHbjI8fxInPxagLsSUg-R3DzSz-X7WYNdoorPYoE3hg,10855
|
|
33
33
|
tpu_inference/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
|
|
35
35
|
tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
|
|
36
|
-
tpu_inference/core/disagg_utils.py,sha256=
|
|
36
|
+
tpu_inference/core/disagg_utils.py,sha256=lv8MAVoAjtcmTaenUXVokg2q3d0tzsma86UiQlQ3omY,1492
|
|
37
37
|
tpu_inference/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
tpu_inference/core/sched/dp_scheduler.py,sha256=mKs8Ms46szdlBfo8hjdqis2ZKAZbcKnHAGfEr0X5R8g,22527
|
|
39
39
|
tpu_inference/distributed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
tpu_inference/distributed/jax_parallel_state.py,sha256=5_xCwcL03lFPUoSO_OP7hIVKpUFroW1m-jVO7R6FbUc,2223
|
|
41
|
-
tpu_inference/distributed/tpu_connector.py,sha256=
|
|
42
|
-
tpu_inference/distributed/utils.py,sha256=
|
|
41
|
+
tpu_inference/distributed/tpu_connector.py,sha256=kLaTwy6BrAThJeFkd1soJ47bBo5iGp4GjUJs7xFx4Tg,29696
|
|
42
|
+
tpu_inference/distributed/utils.py,sha256=1KIREn28Zg10O-MSUkVQMRzS09WoGc_VLGOX4QTFJac,1504
|
|
43
43
|
tpu_inference/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
-
tpu_inference/executors/ray_distributed_executor.py,sha256=
|
|
44
|
+
tpu_inference/executors/ray_distributed_executor.py,sha256=9CnzWb8aurH1B0tJfMHB73F-RQBGqSf5DnymetBvZ5o,16225
|
|
45
45
|
tpu_inference/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
46
|
tpu_inference/experimental/llama3_jax_stashed.py,sha256=YK1oSIfto9ALo-HB45XfSrbq9XgVbE4m2C-9zRwmSzI,10913
|
|
47
47
|
tpu_inference/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -53,7 +53,7 @@ tpu_inference/kernels/flash_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
|
|
|
53
53
|
tpu_inference/kernels/flash_attention/kernel.py,sha256=n8gmAFVfchMXlyaSEj8xXJm6AadFt26edQihPRdithY,25897
|
|
54
54
|
tpu_inference/kernels/fused_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
tpu_inference/kernels/fused_moe/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
tpu_inference/kernels/fused_moe/v1/kernel.py,sha256=
|
|
56
|
+
tpu_inference/kernels/fused_moe/v1/kernel.py,sha256=xVXfclgbw_3U7c5W1azDFkFDK5FolBzDN9IL0rIzLQs,62813
|
|
57
57
|
tpu_inference/kernels/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
58
|
tpu_inference/kernels/mla/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
59
|
tpu_inference/kernels/mla/v1/kernel.py,sha256=dw1nhpL47uQxMFOIN2kENC6aITbalT81YZLAyr1usLU,51571
|
|
@@ -67,18 +67,18 @@ tpu_inference/kernels/ragged_paged_attention/v2/kernel.py,sha256=OiQGAHhyggbp1Pe
|
|
|
67
67
|
tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py,sha256=vGp2ZWODTbjyG9z2z0Qf_BX-wYHd5bUybnc_DtOz0nI,10995
|
|
68
68
|
tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py,sha256=mw80bXBGenroGdrITV0F_EaI2s-Z9KWwqU9WodvJg14,97919
|
|
69
69
|
tpu_inference/kernels/ragged_paged_attention/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=
|
|
71
|
-
tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=
|
|
70
|
+
tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=O179Fft5KpuN5LIFx3SghWXJJUqh3Og-xqfO4Z8QXYU,57032
|
|
71
|
+
tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=ArwrqIQiKIop_jaDKAMw656YHQ3IFZ0sRu9Cgycrtko,59858
|
|
72
72
|
tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=k3LwduhZO85cJ-pSgnGN0c2Nn8eNeQq4eA94KUXJzMw,142198
|
|
73
73
|
tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py,sha256=P3_ivi8iUz5QMU_3pgpl4Bkbmn0q0NpDtVJX39haRQA,11208
|
|
74
74
|
tpu_inference/kernels/ragged_paged_attention/v3/util.py,sha256=1N_ozjKboDYLteFJndWoLXNudj2z53rGXMkELa5Z9tY,1102
|
|
75
75
|
tpu_inference/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
tpu_inference/layers/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
-
tpu_inference/layers/common/attention_interface.py,sha256=
|
|
77
|
+
tpu_inference/layers/common/attention_interface.py,sha256=SQZ-1I32Jqg7GGI-z4BVibXbaitJHyTs26X3B5nBRVo,13369
|
|
78
78
|
tpu_inference/layers/common/attention_metadata.py,sha256=St8ZatbY1D7xQACKJH459jMgp3oTP3AQ36mi9FZdrPU,850
|
|
79
79
|
tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
|
|
80
80
|
tpu_inference/layers/common/quant_methods.py,sha256=mQSxZ44-QQtm22C_8ViejnP1cP2Dv6yc2YaP6oMKJeQ,185
|
|
81
|
-
tpu_inference/layers/common/sharding.py,sha256=
|
|
81
|
+
tpu_inference/layers/common/sharding.py,sha256=sjbwkDr2fP26Ob8f5cSDeDifr3eWFZMDHU4MKr7pIgQ,25217
|
|
82
82
|
tpu_inference/layers/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
83
|
tpu_inference/layers/jax/base.py,sha256=Vhts6ZMwNCZ8LbnEXeB0rl3nHdS5hDJWX7HEa7Fl7yE,5775
|
|
84
84
|
tpu_inference/layers/jax/constants.py,sha256=NcYg0zAf3ClfP7YMYdYu_F1GngOzZaIxIAHBZDunKw4,2755
|
|
@@ -102,14 +102,14 @@ tpu_inference/layers/jax/sample/sampling.py,sha256=C30KgmdOVSaagvHhbfLgVJtVQmJo8
|
|
|
102
102
|
tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=Gd835LNWfGM0NRQBVBqEv0nPwt5q9F4AdFym0CUS1fw,2561
|
|
103
103
|
tpu_inference/layers/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
104
|
tpu_inference/layers/vllm/attention.py,sha256=wbJpcgqEAuIirv5PIULbiP-ggMKjmTanbB7Dg0BVYv4,7366
|
|
105
|
-
tpu_inference/layers/vllm/fused_moe.py,sha256=
|
|
105
|
+
tpu_inference/layers/vllm/fused_moe.py,sha256=5rzPzMsKRmsD-hh2Rue0ahaeyhh6cHB7FUIrKfn4s0U,21089
|
|
106
106
|
tpu_inference/layers/vllm/linear_common.py,sha256=_YlJtbdaYcck_j-gFLos_k0ycktVWxT8Qo57tR2YqJ8,7749
|
|
107
|
-
tpu_inference/layers/vllm/sharding.py,sha256=
|
|
107
|
+
tpu_inference/layers/vllm/sharding.py,sha256=as7CF8UKTF3ToymwRY5Pi8uzwJk0P1sHPkWB5xEx3mA,9169
|
|
108
108
|
tpu_inference/layers/vllm/quantization/__init__.py,sha256=SEppGayBzzQ5tsXLSy99aqilkAawQwYxnv2alCg6-ZU,1777
|
|
109
109
|
tpu_inference/layers/vllm/quantization/awq.py,sha256=-8ZmjGvSKJB6_JuwSctNWt8xHWq4VSvK_AK9iahlgCo,8495
|
|
110
|
-
tpu_inference/layers/vllm/quantization/common.py,sha256=
|
|
111
|
-
tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=
|
|
112
|
-
tpu_inference/layers/vllm/quantization/unquantized.py,sha256=
|
|
110
|
+
tpu_inference/layers/vllm/quantization/common.py,sha256=8XD64pPa077c9HThFhLFVHlDL9YBafnYwp6rp6gR44E,4432
|
|
111
|
+
tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=o661uiSvLvWGr8hQMl7TqYXJyALPREtNWlKHAM9AUrw,14541
|
|
112
|
+
tpu_inference/layers/vllm/quantization/unquantized.py,sha256=nSRBzVurTiQQkF9FuSTshfRwfxfzs54E2_4eK7Eyhj0,15345
|
|
113
113
|
tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
114
|
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=6idEyy3e849fZ1UeNvc9eSHYX7e6qvohrJa_d_D9MBk,5285
|
|
115
115
|
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=FM901QhyhJRC8CuMeICzCVVERvBHbhruRxYW0EQ570s,8820
|
|
@@ -118,62 +118,57 @@ tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_ten
|
|
|
118
118
|
tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=4y7lYgybpXszpCAtxGFhR8LDEbEoCCeo3DfUSOXxhaQ,5202
|
|
119
119
|
tpu_inference/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
120
120
|
tpu_inference/lora/torch_lora_ops.py,sha256=pr3N7DVfkn3ANijUC6dBoiCtIJW4fdJpKdC3zWBUsxE,3121
|
|
121
|
-
tpu_inference/lora/torch_punica_tpu.py,sha256=
|
|
122
|
-
tpu_inference/mock/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
123
|
-
tpu_inference/mock/vllm_config_utils.py,sha256=FlQshLjoHdgs3C66tYHYbKFUjbk9DhUwY-7HibZk0fI,878
|
|
124
|
-
tpu_inference/mock/vllm_envs.py,sha256=cCubeOhH2WeYZQFJt6W0y_IiQo0fzIWR1LCCE8i6kI4,50990
|
|
125
|
-
tpu_inference/mock/vllm_logger.py,sha256=vUGnN5nKT--ZvU15YCzODUM_FGiXKhcrrjDGjeN00RQ,7297
|
|
126
|
-
tpu_inference/mock/vllm_logging_utils.py,sha256=TEUmKj3xHiLzHBnFqAujcxH0t2hBQ04sUaho2RyORnk,486
|
|
121
|
+
tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
|
|
127
122
|
tpu_inference/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
123
|
tpu_inference/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
|
-
tpu_inference/models/common/model_loader.py,sha256=
|
|
124
|
+
tpu_inference/models/common/model_loader.py,sha256=b3aigca81gMVJt42oF2aoRohQHjBBe3oK3IPblZAaUM,19996
|
|
130
125
|
tpu_inference/models/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
126
|
tpu_inference/models/jax/deepseek_v3.py,sha256=SKOHVEC-_2NLxBnzBzbu5tu0d6FTlAEiI1EefGaO2QE,40047
|
|
132
127
|
tpu_inference/models/jax/gpt_oss.py,sha256=Vw4LRB5Kp6hbA2hjZGFS8kiEqOCjf881XH2JNtu2S1I,20924
|
|
133
128
|
tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=Pxu1PCV5LN5X58aYVkPiohcXZIeKVim2oqvrS_cVgw4,2604
|
|
134
|
-
tpu_inference/models/jax/llama3.py,sha256=
|
|
129
|
+
tpu_inference/models/jax/llama3.py,sha256=ZiFtrpAzXTT9vAPES9UeuJInCWGbvDWs7g0_JLdCCa4,13479
|
|
135
130
|
tpu_inference/models/jax/llama4.py,sha256=wf2Sp2iYViaYD5rSfv3_ryO6gYuYM5XaOyvghaP4OCY,29631
|
|
136
|
-
tpu_inference/models/jax/llama_eagle3.py,sha256=
|
|
137
|
-
tpu_inference/models/jax/
|
|
138
|
-
tpu_inference/models/jax/qwen2.py,sha256=
|
|
139
|
-
tpu_inference/models/jax/qwen2_5_vl.py,sha256=
|
|
140
|
-
tpu_inference/models/jax/qwen3.py,sha256=
|
|
131
|
+
tpu_inference/models/jax/llama_eagle3.py,sha256=7-U99yvBkle-FSZ3NDDI-obWSQ2Fo2OTOi1H67H4jxY,12476
|
|
132
|
+
tpu_inference/models/jax/llama_guard_4.py,sha256=LrnU2zBWM0s4q_5dwmR--OO0V7ttltsYhrHYlBgQVIw,15275
|
|
133
|
+
tpu_inference/models/jax/qwen2.py,sha256=SuAp7tErk8OoIRko0Vt6QSOZP_9B9r5GTfqmVfImUIo,13410
|
|
134
|
+
tpu_inference/models/jax/qwen2_5_vl.py,sha256=WUOmqNE6fHQ8PGU85Y8Bt6-CtCC1Uubbox_9FdpDMMo,49833
|
|
135
|
+
tpu_inference/models/jax/qwen3.py,sha256=CIZQKjZDke_LPGsLNhRCJdDTzWueUneBPAQ1blS24IM,11050
|
|
141
136
|
tpu_inference/models/jax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
142
137
|
tpu_inference/models/jax/utils/file_utils.py,sha256=NOuSC3YFnZpf3CZgYdghbbiNYJt42zgjlEYbOZIVct4,2840
|
|
143
138
|
tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=rrIrQWidkUnGilBHKNpdYh7_2BkvnAaqanXjC81GNcg,6156
|
|
144
|
-
tpu_inference/models/jax/utils/weight_utils.py,sha256=
|
|
139
|
+
tpu_inference/models/jax/utils/weight_utils.py,sha256=qFU53jPHPvIcs_EOdIH80oNojpUp7GdSY2E6NZNsjvM,21376
|
|
145
140
|
tpu_inference/models/jax/utils/quantization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
141
|
tpu_inference/models/jax/utils/quantization/mxfp4_utils.py,sha256=boGnqJCRIOf5nedAxQ8_IUTV6Rfll10DXnRC40BeeE8,3682
|
|
147
|
-
tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=
|
|
142
|
+
tpu_inference/models/jax/utils/quantization/quantization_utils.py,sha256=rzAFU3OtQvg8w8ow0V15rMljAsa4SBrwOye6OI8Bty4,26530
|
|
148
143
|
tpu_inference/models/jax/utils/quantization/configs/fp8_all_modules_w_only.yaml,sha256=d_YHPtaRJ_7PBrPijSzJGnVeoJO62tKIGqrgFqpYT1k,137
|
|
149
144
|
tpu_inference/models/jax/utils/quantization/configs/fp8_default.yaml,sha256=b7SyL75HuSTj3fN9_ZLCK_CDiccL5DGq_DddGmxj_qk,170
|
|
150
145
|
tpu_inference/models/jax/utils/quantization/configs/int8_all_modules_w_only.yaml,sha256=0Qwij71zj9k6rmrUNd8Q5df9YYfkoJ1ZkgMAHxQy81k,128
|
|
151
146
|
tpu_inference/models/jax/utils/quantization/configs/int8_default.yaml,sha256=lGec0UwwxmNPNgKPSsTsCMSXNJjhw507KMtM2NsSCMw,152
|
|
152
147
|
tpu_inference/models/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
153
|
-
tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=
|
|
148
|
+
tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=3EcaD_1vZuyAZBfDtm5u_qfCahQU28qR4rAUraNAFqs,12305
|
|
154
149
|
tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=yxlJHPmRQIAwlb1MmHK3xfXokgIkJ-evNU4PgyoJUdg,1187
|
|
155
150
|
tpu_inference/platforms/__init__.py,sha256=lQCrKddS_GcGpCbeogvz9zOZD1mQw5bBsiw8On46qFQ,74
|
|
156
|
-
tpu_inference/platforms/tpu_platform.py,sha256=
|
|
151
|
+
tpu_inference/platforms/tpu_platform.py,sha256=F4jjPEFHFUTxdfWZYTBuUVJt6SYTFeWEKmrl74sX-Zk,10663
|
|
157
152
|
tpu_inference/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
153
|
tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
|
|
159
|
-
tpu_inference/runner/compilation_manager.py,sha256=
|
|
154
|
+
tpu_inference/runner/compilation_manager.py,sha256=dU0Yk8f0LtRTBe2q0iB3xcMSRco_WPsj2wS6zZJ8WhY,40375
|
|
160
155
|
tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
|
|
161
156
|
tpu_inference/runner/kv_cache.py,sha256=F4dzW2d53xuxkFUn0oKzwE6VklGUeVm-QM19NVfIQDU,4577
|
|
162
|
-
tpu_inference/runner/kv_cache_manager.py,sha256=
|
|
157
|
+
tpu_inference/runner/kv_cache_manager.py,sha256=N0a896CE7Zrs_d4ZSSzRdqgjV1It57RBDSIpOzkRqro,22013
|
|
163
158
|
tpu_inference/runner/lora_utils.py,sha256=B4xMCgXGJ4VNdePvn89HH3tIZ-gYsQ7Vq_YCiYIATEY,3843
|
|
164
159
|
tpu_inference/runner/multimodal_manager.py,sha256=azEPdHOwz8CN11MQmorGdtrCLbFaTCxdWyuEsZTzjYM,9778
|
|
165
|
-
tpu_inference/runner/persistent_batch_manager.py,sha256=
|
|
160
|
+
tpu_inference/runner/persistent_batch_manager.py,sha256=Otu67vOTf1_HKAMZgPDDHlRvvZ3YVJdz-QderH4qOII,13263
|
|
166
161
|
tpu_inference/runner/speculative_decoding_manager.py,sha256=I3FDWKh2dn6nV8LgTGfCTwMKYnxQsTPpBIrmaJngXHs,10215
|
|
167
|
-
tpu_inference/runner/structured_decoding_manager.py,sha256=
|
|
168
|
-
tpu_inference/runner/tpu_runner.py,sha256=
|
|
169
|
-
tpu_inference/runner/utils.py,sha256=
|
|
162
|
+
tpu_inference/runner/structured_decoding_manager.py,sha256=gZQKQUFxh6xYYH9eGTdbguqk8hc2WwTrIdMMuCcbymE,3573
|
|
163
|
+
tpu_inference/runner/tpu_runner.py,sha256=NBDKfSGShHmYpudrtGfo1hnVSQTcLpZV_nPiXEo7JPQ,79439
|
|
164
|
+
tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
|
|
170
165
|
tpu_inference/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
166
|
tpu_inference/spec_decode/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
-
tpu_inference/spec_decode/jax/eagle3.py,sha256=
|
|
167
|
+
tpu_inference/spec_decode/jax/eagle3.py,sha256=FxP0uWeQlHlgCpt1nY3FUd4lKlegKJljHyc05jJucaQ,19104
|
|
173
168
|
tpu_inference/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
-
tpu_inference/worker/tpu_worker.py,sha256=
|
|
175
|
-
tpu_inference-0.11.1.
|
|
176
|
-
tpu_inference-0.11.1.
|
|
177
|
-
tpu_inference-0.11.1.
|
|
178
|
-
tpu_inference-0.11.1.
|
|
179
|
-
tpu_inference-0.11.1.
|
|
169
|
+
tpu_inference/worker/tpu_worker.py,sha256=LnZcSNxdhh0NkoWXxS5bZ0bsTMduSANehy2wELAaVsY,20672
|
|
170
|
+
tpu_inference-0.11.1.dev202512030818.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
171
|
+
tpu_inference-0.11.1.dev202512030818.dist-info/METADATA,sha256=oLzYFTCTvHDQLfyWoc8qV4IMYCoLRTiHECf08oT_bFA,5517
|
|
172
|
+
tpu_inference-0.11.1.dev202512030818.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
173
|
+
tpu_inference-0.11.1.dev202512030818.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
|
|
174
|
+
tpu_inference-0.11.1.dev202512030818.dist-info/RECORD,,
|
tpu_inference/mock/__init__.py
DELETED
|
File without changes
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass, field
|
|
2
|
-
from typing import Any, List, Mapping
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
@dataclass
|
|
6
|
-
class ModelConfig():
|
|
7
|
-
max_model_len: int = 2048
|
|
8
|
-
max_prefill_len: int = 1024
|
|
9
|
-
prefill_batch_size: int = 1
|
|
10
|
-
decode_batch_size: int = 1
|
|
11
|
-
block_size: int = 16
|
|
12
|
-
num_layers: int = 32
|
|
13
|
-
num_kv_heads: int = 32
|
|
14
|
-
head_dim: int = 128
|
|
15
|
-
vocab_size: int = 32000
|
|
16
|
-
model: str = "llama3"
|
|
17
|
-
hf_config: str = ""
|
|
18
|
-
architectures: List[str] = field(default_factory=list)
|
|
19
|
-
override_generation_config: dict[str, Any] = field(default_factory=dict)
|
|
20
|
-
hf_overrides: dict[str, Any] = field(default_factory=dict)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@dataclass
|
|
24
|
-
class VllmConfig():
|
|
25
|
-
additional_config: Mapping[str, Any] = field(default_factory=dict)
|
|
26
|
-
# Set default max_model_len to turn off warnings.
|
|
27
|
-
model_config: ModelConfig = field(
|
|
28
|
-
default_factory=lambda: ModelConfig(max_model_len=1024))
|