tpu-inference 0.11.1.dev202511130813__py3-none-any.whl → 0.11.1.dev202511180814__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tpu-inference might be problematic. Click here for more details.
- tests/test_envs.py +182 -0
- tests/test_utils.py +23 -14
- tpu_inference/core/core_tpu.py +17 -9
- tpu_inference/executors/ray_distributed_executor.py +24 -11
- tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py +33 -10
- tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py +7 -0
- tpu_inference/layers/{jax → common}/attention_interface.py +1 -1
- tpu_inference/layers/common/quant_methods.py +8 -0
- tpu_inference/layers/jax/attention/attention.py +1 -1
- tpu_inference/layers/jax/sample/rejection_sampler.py +1 -1
- tpu_inference/layers/jax/sample/sampling.py +2 -2
- tpu_inference/layers/vllm/attention.py +1 -1
- tpu_inference/layers/vllm/quantization/__init__.py +7 -3
- tpu_inference/layers/vllm/quantization/awq.py +4 -3
- tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py +4 -2
- tpu_inference/layers/vllm/quantization/mxfp4.py +266 -0
- tpu_inference/layers/vllm/quantization/unquantized.py +4 -3
- tpu_inference/models/common/model_loader.py +3 -2
- tpu_inference/models/jax/llama3.py +2 -2
- tpu_inference/models/jax/phi3.py +1 -1
- tpu_inference/models/jax/qwen2.py +1 -1
- tpu_inference/models/jax/qwen2_5_vl.py +2 -2
- tpu_inference/models/jax/qwen3.py +1 -1
- tpu_inference/models/vllm/vllm_model_wrapper.py +22 -10
- tpu_inference/platforms/tpu_platform.py +12 -5
- tpu_inference/runner/compilation_manager.py +4 -2
- tpu_inference/runner/kv_cache.py +1 -1
- tpu_inference/runner/tpu_runner.py +31 -7
- tpu_inference/utils.py +2 -2
- tpu_inference/worker/tpu_worker.py +1 -1
- {tpu_inference-0.11.1.dev202511130813.dist-info → tpu_inference-0.11.1.dev202511180814.dist-info}/METADATA +1 -1
- {tpu_inference-0.11.1.dev202511130813.dist-info → tpu_inference-0.11.1.dev202511180814.dist-info}/RECORD +37 -34
- /tpu_inference/layers/{jax → common}/binary_search.py +0 -0
- /tpu_inference/layers/{jax → common}/sharding.py +0 -0
- {tpu_inference-0.11.1.dev202511130813.dist-info → tpu_inference-0.11.1.dev202511180814.dist-info}/WHEEL +0 -0
- {tpu_inference-0.11.1.dev202511130813.dist-info → tpu_inference-0.11.1.dev202511180814.dist-info}/licenses/LICENSE +0 -0
- {tpu_inference-0.11.1.dev202511130813.dist-info → tpu_inference-0.11.1.dev202511180814.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
tests/test_base.py,sha256=Ct5WFRMHL7IHEIxk8FrzAvO8m0xFuDpzDBKkAKKAL2Q,7341
|
|
3
|
+
tests/test_envs.py,sha256=Woyfp_d5HS-uTGo4_u9dYlBbgmhfIEoFb-Rx_k7YXD4,6298
|
|
3
4
|
tests/test_quantization.py,sha256=IT5ASyS1uuWcxc22kRtBcA-V4j3Z3hb7pMztm3GOlBs,34445
|
|
4
5
|
tests/test_tpu_info.py,sha256=ZrwlMsp8ffITkS_b8Q1t_QG-a-WVAd4NUcjHhGibcsI,4670
|
|
5
|
-
tests/test_utils.py,sha256=
|
|
6
|
+
tests/test_utils.py,sha256=Mta5ZzYCgRAh1-BjcOvvx9iQ9DnnXLps7oDHxVQp2yE,8236
|
|
6
7
|
tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
|
|
8
9
|
tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
|
|
@@ -28,9 +29,9 @@ tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc
|
|
|
28
29
|
tpu_inference/envs.py,sha256=MTT_Pdtd6cAcciYjv1OekEmvspaq3SYL0oR_jDkQ_aE,3948
|
|
29
30
|
tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
|
|
30
31
|
tpu_inference/tpu_info.py,sha256=9UohshkndR6dZpGWpWXfTD4qvIVdVgHf0yOoSEkLTrw,2276
|
|
31
|
-
tpu_inference/utils.py,sha256=
|
|
32
|
+
tpu_inference/utils.py,sha256=iGPY147jP_8AKMu3g7vYTndjJJiOrK_4opA0JWtws5Q,10068
|
|
32
33
|
tpu_inference/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
-
tpu_inference/core/core_tpu.py,sha256=
|
|
34
|
+
tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
|
|
34
35
|
tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
|
|
35
36
|
tpu_inference/core/disagg_utils.py,sha256=ufWNFWQ5n4YnZpPOtoReHlYo4dlN7AbIqCyqS4an0t4,1572
|
|
36
37
|
tpu_inference/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -40,7 +41,7 @@ tpu_inference/distributed/jax_parallel_state.py,sha256=5_xCwcL03lFPUoSO_OP7hIVKp
|
|
|
40
41
|
tpu_inference/distributed/tpu_connector.py,sha256=Zah46Sm5iOuh72SzXw69NxMc0MLnqsLEpe2BfDhpnqA,29731
|
|
41
42
|
tpu_inference/distributed/utils.py,sha256=RwFQi8G4TzN1g9RjQu0pb5JxSc_jhoIZVsFJo0uHjxo,1513
|
|
42
43
|
tpu_inference/executors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
tpu_inference/executors/ray_distributed_executor.py,sha256=
|
|
44
|
+
tpu_inference/executors/ray_distributed_executor.py,sha256=ZMuVUwmroi7UUZs3u67OsOwUIkxNDz9IszUPG20F18E,15904
|
|
44
45
|
tpu_inference/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
46
|
tpu_inference/experimental/llama3_jax_stashed.py,sha256=YK1oSIfto9ALo-HB45XfSrbq9XgVbE4m2C-9zRwmSzI,10913
|
|
46
47
|
tpu_inference/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -67,26 +68,27 @@ tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py,sha256
|
|
|
67
68
|
tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py,sha256=mw80bXBGenroGdrITV0F_EaI2s-Z9KWwqU9WodvJg14,97919
|
|
68
69
|
tpu_inference/kernels/ragged_paged_attention/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
70
|
tpu_inference/kernels/ragged_paged_attention/v3/kernel.py,sha256=tlP6121yfXaukx_RQroHlHcZnbKPyyum0lAcvT0B_Pk,56132
|
|
70
|
-
tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=
|
|
71
|
-
tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=
|
|
71
|
+
tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py,sha256=pD1Pte3neoLAxE3I3-VyV_4FuqgCHeAHGzEjMVt0MMk,56004
|
|
72
|
+
tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py,sha256=k3LwduhZO85cJ-pSgnGN0c2Nn8eNeQq4eA94KUXJzMw,142198
|
|
72
73
|
tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py,sha256=P3_ivi8iUz5QMU_3pgpl4Bkbmn0q0NpDtVJX39haRQA,11208
|
|
73
74
|
tpu_inference/kernels/ragged_paged_attention/v3/util.py,sha256=1N_ozjKboDYLteFJndWoLXNudj2z53rGXMkELa5Z9tY,1102
|
|
74
75
|
tpu_inference/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
76
|
tpu_inference/layers/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
+
tpu_inference/layers/common/attention_interface.py,sha256=CImMS8tuWgvaRY9YbGS3pY7OBnzeJ4Jla7LRFb4Xoa4,13224
|
|
76
78
|
tpu_inference/layers/common/attention_metadata.py,sha256=St8ZatbY1D7xQACKJH459jMgp3oTP3AQ36mi9FZdrPU,850
|
|
79
|
+
tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
|
|
80
|
+
tpu_inference/layers/common/quant_methods.py,sha256=mQSxZ44-QQtm22C_8ViejnP1cP2Dv6yc2YaP6oMKJeQ,185
|
|
81
|
+
tpu_inference/layers/common/sharding.py,sha256=wBqdkXZSWfnnH8pkJtyW2DSqmAe_V4Vxi0iMPaXq0Z0,25185
|
|
77
82
|
tpu_inference/layers/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
-
tpu_inference/layers/jax/attention_interface.py,sha256=1jlvSZWaP6DuPVtb1W_KPw4-Qi68BikOBNLLcpygupY,13221
|
|
79
83
|
tpu_inference/layers/jax/base.py,sha256=Vhts6ZMwNCZ8LbnEXeB0rl3nHdS5hDJWX7HEa7Fl7yE,5775
|
|
80
|
-
tpu_inference/layers/jax/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
|
|
81
84
|
tpu_inference/layers/jax/constants.py,sha256=NcYg0zAf3ClfP7YMYdYu_F1GngOzZaIxIAHBZDunKw4,2755
|
|
82
85
|
tpu_inference/layers/jax/layers.py,sha256=yv_lC2tbJuzVL-OaXYooX82Ys8hWZATeH9M78coJ3VI,10633
|
|
83
86
|
tpu_inference/layers/jax/misc.py,sha256=znKv1Nuq_LgYpaIu0qlzUVDgQWnjjG7aqPJGM8kuwcw,566
|
|
84
87
|
tpu_inference/layers/jax/rope.py,sha256=i2E7pRLWgOaFLbeo8_phZwKQWJW7ohAyl69E2V2Mc2U,11349
|
|
85
88
|
tpu_inference/layers/jax/rope_interface.py,sha256=X0SruXizlCHGnssFujC1pL07UC4Vsp7-gdBy_Q7JZhI,8375
|
|
86
|
-
tpu_inference/layers/jax/sharding.py,sha256=wBqdkXZSWfnnH8pkJtyW2DSqmAe_V4Vxi0iMPaXq0Z0,25185
|
|
87
89
|
tpu_inference/layers/jax/transformer_block.py,sha256=ufv-yfVDmRP_Ynrx3UX9xj-x0PkNw_tQ-0N0eYf4i7M,3917
|
|
88
90
|
tpu_inference/layers/jax/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
-
tpu_inference/layers/jax/attention/attention.py,sha256=
|
|
91
|
+
tpu_inference/layers/jax/attention/attention.py,sha256=DJFDkpQc9SDD156wVPFw3r2XaBgb44QNJ8OcdONaF5g,10085
|
|
90
92
|
tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=YlagoBMwINv2KRH1dr4oEcH_cQ9QMPB55nO2FQZsWs0,14010
|
|
91
93
|
tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=rkrEv4aNZxtAGcXd1HXHUxhNeDNAd9nWTEZOKWSI8cA,8725
|
|
92
94
|
tpu_inference/layers/jax/attention/llama4_attention.py,sha256=VvUmfBxQEbHf3F2BrcYDUnq5abj7CSDYeRsNx_eVAh0,6162
|
|
@@ -95,20 +97,21 @@ tpu_inference/layers/jax/moe/deepseek_v3_moe.py,sha256=Q6CuwwiZtWYm6iUee1wJoDJrw
|
|
|
95
97
|
tpu_inference/layers/jax/moe/gpt_oss_moe.py,sha256=Rx5b1jg2XMm7Xx9hrjgvyhscaJ_zGbVMHmeEiLh7kIQ,6196
|
|
96
98
|
tpu_inference/layers/jax/moe/moe.py,sha256=cA8R1rjbBwNEoNlsPWjeIBB9nvaRDwlEdwQTVg6lTpY,8762
|
|
97
99
|
tpu_inference/layers/jax/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
98
|
-
tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=
|
|
99
|
-
tpu_inference/layers/jax/sample/sampling.py,sha256=
|
|
100
|
+
tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=nI5s0E73xkqDIu2hTljIXt23B1Q-gRnC1myoQpGDJrQ,20426
|
|
101
|
+
tpu_inference/layers/jax/sample/sampling.py,sha256=C30KgmdOVSaagvHhbfLgVJtVQmJo86CbHPa4h36Vn70,3314
|
|
100
102
|
tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=Gd835LNWfGM0NRQBVBqEv0nPwt5q9F4AdFym0CUS1fw,2561
|
|
101
103
|
tpu_inference/layers/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
|
-
tpu_inference/layers/vllm/attention.py,sha256=
|
|
104
|
+
tpu_inference/layers/vllm/attention.py,sha256=wbJpcgqEAuIirv5PIULbiP-ggMKjmTanbB7Dg0BVYv4,7366
|
|
103
105
|
tpu_inference/layers/vllm/fused_moe.py,sha256=XZt2CPUz00qZzDcyfBFz6buhVzmGL1amHalHJALl9zw,18945
|
|
104
106
|
tpu_inference/layers/vllm/linear_common.py,sha256=_YlJtbdaYcck_j-gFLos_k0ycktVWxT8Qo57tR2YqJ8,7749
|
|
105
107
|
tpu_inference/layers/vllm/sharding.py,sha256=WTx1tF_7R99AdyE-lL7HQJ378hAafeI-JVRsugAvwn4,9177
|
|
106
|
-
tpu_inference/layers/vllm/quantization/__init__.py,sha256=
|
|
107
|
-
tpu_inference/layers/vllm/quantization/awq.py,sha256
|
|
108
|
+
tpu_inference/layers/vllm/quantization/__init__.py,sha256=SEppGayBzzQ5tsXLSy99aqilkAawQwYxnv2alCg6-ZU,1777
|
|
109
|
+
tpu_inference/layers/vllm/quantization/awq.py,sha256=-8ZmjGvSKJB6_JuwSctNWt8xHWq4VSvK_AK9iahlgCo,8495
|
|
108
110
|
tpu_inference/layers/vllm/quantization/common.py,sha256=wm3pge6XMTMsLK7_SSdgBP0PvQzz-1mrqN2I6xMqzrc,4218
|
|
109
|
-
tpu_inference/layers/vllm/quantization/
|
|
111
|
+
tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=KwGoqIiPkd6FplGuYAKi4uX5A8MPlZqq99MVPchXyi4,11561
|
|
112
|
+
tpu_inference/layers/vllm/quantization/unquantized.py,sha256=Q1v1ZbSIDmaoOg97Ehv6rA5CnSf6nTP40xDBMmHHeLw,15054
|
|
110
113
|
tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
|
-
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=
|
|
114
|
+
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=6idEyy3e849fZ1UeNvc9eSHYX7e6qvohrJa_d_D9MBk,5285
|
|
112
115
|
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=FM901QhyhJRC8CuMeICzCVVERvBHbhruRxYW0EQ570s,8820
|
|
113
116
|
tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
117
|
tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=6sQvsxiWdi5Vte8V9vrQ2abaqGqWpq-mtzU7lGAo-ac,8759
|
|
@@ -123,18 +126,18 @@ tpu_inference/mock/vllm_logger.py,sha256=vUGnN5nKT--ZvU15YCzODUM_FGiXKhcrrjDGjeN
|
|
|
123
126
|
tpu_inference/mock/vllm_logging_utils.py,sha256=TEUmKj3xHiLzHBnFqAujcxH0t2hBQ04sUaho2RyORnk,486
|
|
124
127
|
tpu_inference/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
128
|
tpu_inference/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
126
|
-
tpu_inference/models/common/model_loader.py,sha256=
|
|
129
|
+
tpu_inference/models/common/model_loader.py,sha256=VgxM2OODb0-69dexv4aNJ4g24Nrx5sj_ra4XStkhl14,18289
|
|
127
130
|
tpu_inference/models/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
131
|
tpu_inference/models/jax/deepseek_v3.py,sha256=SKOHVEC-_2NLxBnzBzbu5tu0d6FTlAEiI1EefGaO2QE,40047
|
|
129
132
|
tpu_inference/models/jax/gpt_oss.py,sha256=Vw4LRB5Kp6hbA2hjZGFS8kiEqOCjf881XH2JNtu2S1I,20924
|
|
130
133
|
tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=Pxu1PCV5LN5X58aYVkPiohcXZIeKVim2oqvrS_cVgw4,2604
|
|
131
|
-
tpu_inference/models/jax/llama3.py,sha256=
|
|
134
|
+
tpu_inference/models/jax/llama3.py,sha256=w99DAfipGS9HyX2ZRwqyYLxC3oa0ew5eEQ6EXlMMf18,13426
|
|
132
135
|
tpu_inference/models/jax/llama4.py,sha256=wf2Sp2iYViaYD5rSfv3_ryO6gYuYM5XaOyvghaP4OCY,29631
|
|
133
136
|
tpu_inference/models/jax/llama_eagle3.py,sha256=STUkAK6XEA7JM3i_Lx36-t5BhkAGeW_xYiq3zYhHP1A,12297
|
|
134
|
-
tpu_inference/models/jax/phi3.py,sha256=
|
|
135
|
-
tpu_inference/models/jax/qwen2.py,sha256=
|
|
136
|
-
tpu_inference/models/jax/qwen2_5_vl.py,sha256=
|
|
137
|
-
tpu_inference/models/jax/qwen3.py,sha256=
|
|
137
|
+
tpu_inference/models/jax/phi3.py,sha256=TpP3Nvr1myW_Qd8xNrLP1VmXtq7BuTcWNayJitskFd0,13579
|
|
138
|
+
tpu_inference/models/jax/qwen2.py,sha256=P_x_Qygf-nanmF8Uufk4c-qLNxP4RAk4yuqSF8VwbxE,13357
|
|
139
|
+
tpu_inference/models/jax/qwen2_5_vl.py,sha256=fvMgM5GfUn5EECaMbR0z37mmbCHphAT1AvWPvGkhVn4,43942
|
|
140
|
+
tpu_inference/models/jax/qwen3.py,sha256=lr3TIIQKmNgWFDFxwuPsVOypqBijkqrpnNCopVg4iBo,10997
|
|
138
141
|
tpu_inference/models/jax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
142
|
tpu_inference/models/jax/utils/file_utils.py,sha256=NOuSC3YFnZpf3CZgYdghbbiNYJt42zgjlEYbOZIVct4,2840
|
|
140
143
|
tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=rrIrQWidkUnGilBHKNpdYh7_2BkvnAaqanXjC81GNcg,6156
|
|
@@ -147,30 +150,30 @@ tpu_inference/models/jax/utils/quantization/configs/fp8_default.yaml,sha256=b7Sy
|
|
|
147
150
|
tpu_inference/models/jax/utils/quantization/configs/int8_all_modules_w_only.yaml,sha256=0Qwij71zj9k6rmrUNd8Q5df9YYfkoJ1ZkgMAHxQy81k,128
|
|
148
151
|
tpu_inference/models/jax/utils/quantization/configs/int8_default.yaml,sha256=lGec0UwwxmNPNgKPSsTsCMSXNJjhw507KMtM2NsSCMw,152
|
|
149
152
|
tpu_inference/models/vllm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
150
|
-
tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=
|
|
153
|
+
tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=o3oJ7Uhu-vSJEFHHifF8e0Q7dULRKJ2GRsT1qAN6PWY,12099
|
|
151
154
|
tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=yxlJHPmRQIAwlb1MmHK3xfXokgIkJ-evNU4PgyoJUdg,1187
|
|
152
155
|
tpu_inference/platforms/__init__.py,sha256=lQCrKddS_GcGpCbeogvz9zOZD1mQw5bBsiw8On46qFQ,74
|
|
153
|
-
tpu_inference/platforms/tpu_platform.py,sha256=
|
|
156
|
+
tpu_inference/platforms/tpu_platform.py,sha256=AYFr1Q7VUN76wcdgOe_wZuVIHgp2U8isBJ3iHrYqt0M,10530
|
|
154
157
|
tpu_inference/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
158
|
tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
|
|
156
|
-
tpu_inference/runner/compilation_manager.py,sha256=
|
|
159
|
+
tpu_inference/runner/compilation_manager.py,sha256=yIsonouB5G0-fyVtAKuyyRXaMGNFwnX8D7q6ppQYgUI,36318
|
|
157
160
|
tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
|
|
158
|
-
tpu_inference/runner/kv_cache.py,sha256=
|
|
161
|
+
tpu_inference/runner/kv_cache.py,sha256=F4dzW2d53xuxkFUn0oKzwE6VklGUeVm-QM19NVfIQDU,4577
|
|
159
162
|
tpu_inference/runner/kv_cache_manager.py,sha256=CJxXtdWuewJqcTBMoR70_Uvwxjtc3cK2jxe1KpI9kQc,22152
|
|
160
163
|
tpu_inference/runner/lora_utils.py,sha256=B4xMCgXGJ4VNdePvn89HH3tIZ-gYsQ7Vq_YCiYIATEY,3843
|
|
161
164
|
tpu_inference/runner/multimodal_manager.py,sha256=azEPdHOwz8CN11MQmorGdtrCLbFaTCxdWyuEsZTzjYM,9778
|
|
162
165
|
tpu_inference/runner/persistent_batch_manager.py,sha256=KERSfKy6XjMejnbtPGI3hzoYAHJLeCxmpZVYPqBCago,11156
|
|
163
166
|
tpu_inference/runner/speculative_decoding_manager.py,sha256=I3FDWKh2dn6nV8LgTGfCTwMKYnxQsTPpBIrmaJngXHs,10215
|
|
164
167
|
tpu_inference/runner/structured_decoding_manager.py,sha256=Y0ERPhj4olFh6Y2TxP0R1_4UIJwy7nemYA-h63YIR2U,3622
|
|
165
|
-
tpu_inference/runner/tpu_runner.py,sha256=
|
|
168
|
+
tpu_inference/runner/tpu_runner.py,sha256=3SZYn0CBA4LOaTO3GdQOxKx3HKmVcNmUEeSyzSAGyFY,73320
|
|
166
169
|
tpu_inference/runner/utils.py,sha256=ZnWUoNo-7INeB0mdXti1jwUOdbmxyExznOs-crRTQLk,17126
|
|
167
170
|
tpu_inference/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
171
|
tpu_inference/spec_decode/jax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
172
|
tpu_inference/spec_decode/jax/eagle3.py,sha256=A1dt-dmBttpy-5DGcL4noEDCB0OGP8Xo6MXqgJvWIo8,16593
|
|
170
173
|
tpu_inference/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
-
tpu_inference/worker/tpu_worker.py,sha256=
|
|
172
|
-
tpu_inference-0.11.1.
|
|
173
|
-
tpu_inference-0.11.1.
|
|
174
|
-
tpu_inference-0.11.1.
|
|
175
|
-
tpu_inference-0.11.1.
|
|
176
|
-
tpu_inference-0.11.1.
|
|
174
|
+
tpu_inference/worker/tpu_worker.py,sha256=0ZguK2BtIQjQSvyUTcUH9ENBrxt09w3CbgPoDY13Eok,14210
|
|
175
|
+
tpu_inference-0.11.1.dev202511180814.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
176
|
+
tpu_inference-0.11.1.dev202511180814.dist-info/METADATA,sha256=6dHy_ByQ0ihDNFuqyb-ZXTFczvQ8Ia54zBNTKaUPhSk,5465
|
|
177
|
+
tpu_inference-0.11.1.dev202511180814.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
178
|
+
tpu_inference-0.11.1.dev202511180814.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
|
|
179
|
+
tpu_inference-0.11.1.dev202511180814.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|