tpu-inference 0.12.0.dev20251222__py3-none-any.whl → 0.12.0.dev20251224__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/core/test_dp_scheduler.py +128 -71
- tests/e2e/test_data_parallel.py +176 -280
- tests/e2e/test_hybrid_kvcache.py +219 -0
- tests/e2e/test_speculative_decoding.py +26 -6
- tests/layers/jax/test_qwix.py +1 -1
- tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py +36 -21
- tests/layers/vllm/test_compressed_tensors_w8a8_int8.py +36 -21
- tests/layers/vllm/test_mxfp4.py +25 -10
- tests/layers/vllm/test_unquantized.py +61 -31
- tests/layers/vllm/utils.py +19 -4
- tests/models/common/test_model_loader.py +2 -2
- tests/models/jax/test_qwen2_5_vl.py +10 -11
- tests/runner/test_multimodal_manager.py +3 -3
- tests/runner/test_tpu_runner.py +67 -8
- tests/runner/test_tpu_runner_dp.py +66 -0
- tpu_inference/core/sched/dp_scheduler.py +65 -40
- tpu_inference/kernels/mla/v1/kernel.py +7 -26
- tpu_inference/layers/common/sharding.py +8 -3
- tpu_inference/layers/jax/attention/deepseek_v3_attention.py +3 -3
- tpu_inference/layers/jax/attention/gpt_oss_attention.py +3 -3
- tpu_inference/layers/jax/attention/llama4_attention.py +3 -4
- tpu_inference/layers/jax/sample/sampling.py +1 -1
- tpu_inference/layers/vllm/fused_moe.py +51 -47
- tpu_inference/layers/vllm/quantization/common.py +14 -13
- tpu_inference/layers/vllm/quantization/mxfp4.py +21 -7
- tpu_inference/layers/vllm/quantization/unquantized.py +19 -7
- tpu_inference/layers/vllm/sharding.py +7 -4
- tpu_inference/models/common/model_loader.py +11 -14
- tpu_inference/models/jax/llama3.py +13 -10
- tpu_inference/models/jax/llama_guard_4.py +1 -1
- tpu_inference/models/jax/qwen2.py +3 -2
- tpu_inference/models/jax/qwen2_5_vl.py +4 -4
- tpu_inference/models/jax/utils/multi_modal_utils.py +4 -4
- tpu_inference/models/jax/utils/qwix/qwix_utils.py +3 -3
- tpu_inference/models/vllm/vllm_model_wrapper.py +5 -2
- tpu_inference/platforms/tpu_platform.py +7 -7
- tpu_inference/runner/compilation_manager.py +43 -33
- tpu_inference/runner/kv_cache_manager.py +1 -2
- tpu_inference/runner/multimodal_manager.py +1 -1
- tpu_inference/runner/tpu_runner.py +12 -9
- tpu_inference/utils.py +31 -30
- tpu_inference/worker/tpu_worker.py +5 -2
- {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/METADATA +1 -1
- {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/RECORD +47 -46
- {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/WHEEL +0 -0
- {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/licenses/LICENSE +0 -0
- {tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import functools
|
|
16
|
-
from typing import TYPE_CHECKING,
|
|
16
|
+
from typing import TYPE_CHECKING, List
|
|
17
17
|
|
|
18
18
|
import jax
|
|
19
19
|
import jax.numpy as jnp
|
|
@@ -212,7 +212,6 @@ class KVCacheManager:
|
|
|
212
212
|
# uniform page size.
|
|
213
213
|
representative_spec = kv_cache_config.kv_cache_groups[0].kv_cache_spec
|
|
214
214
|
page_size_bytes = representative_spec.page_size_bytes
|
|
215
|
-
self.runner.layer_name_to_kvcache_index: Dict[str, int] = {}
|
|
216
215
|
kv_caches = self.runner.kv_caches
|
|
217
216
|
num_blocks_list = []
|
|
218
217
|
for i, kv_cache_tensor in enumerate(kv_cache_config.kv_cache_tensors):
|
|
@@ -148,7 +148,7 @@ class MultiModalManager:
|
|
|
148
148
|
# 2. A list or tuple (length: num_items) of tensors, each of shape
|
|
149
149
|
# (feature_size, hidden_size) in case the feature size is dynamic
|
|
150
150
|
# depending on the input multimodal items.
|
|
151
|
-
curr_group_outputs = self.runner.
|
|
151
|
+
curr_group_outputs = self.runner.embed_multimodal_fn(
|
|
152
152
|
self.runner.state, image_grid_thw, **batched_mm_inputs)
|
|
153
153
|
|
|
154
154
|
sanity_check_mm_encoder_outputs(
|
|
@@ -282,6 +282,9 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
282
282
|
self._substitute_placeholder_token_fn = _substitute_placeholder_token
|
|
283
283
|
self.execute_model_state: ExecuteModelState | None = None
|
|
284
284
|
|
|
285
|
+
self.kv_caches: list[jax.Array] = []
|
|
286
|
+
self.layer_name_to_kvcache_index: dict[str, int] = {}
|
|
287
|
+
|
|
285
288
|
def _init_random(self):
|
|
286
289
|
if self.model_config.seed is None:
|
|
287
290
|
self.model_config.seed = 0
|
|
@@ -508,10 +511,10 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
508
511
|
multimodal_fns = multimodal_fns or {}
|
|
509
512
|
self.precompile_vision_encoder_fn = multimodal_fns.get(
|
|
510
513
|
"precompile_vision_encoder_fn", None)
|
|
511
|
-
self.
|
|
512
|
-
|
|
513
|
-
self.
|
|
514
|
-
|
|
514
|
+
self.embed_multimodal_fn = multimodal_fns.get("embed_multimodal_fn",
|
|
515
|
+
None)
|
|
516
|
+
self.embed_input_ids_fn = multimodal_fns.get("embed_input_ids_fn",
|
|
517
|
+
None)
|
|
515
518
|
self.get_mrope_input_positions_fn = multimodal_fns.get(
|
|
516
519
|
"get_mrope_input_positions_fn", None)
|
|
517
520
|
|
|
@@ -523,7 +526,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
523
526
|
jax.random.key(self.model_config.seed)).params()
|
|
524
527
|
self.is_multimodal_model = (
|
|
525
528
|
self.model_config.is_multimodal_model
|
|
526
|
-
and self.
|
|
529
|
+
and self.embed_multimodal_fn is not None and hasattr(
|
|
527
530
|
self.model_config.hf_config, "architectures"
|
|
528
531
|
) #TODO: Remove Llama Guard 4 specific condition once the LG4 Vision portion is implemented
|
|
529
532
|
and len(self.model_config.hf_config.architectures) >= 1
|
|
@@ -545,7 +548,6 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
545
548
|
self.topology_order_id = topology_order_id
|
|
546
549
|
self.kv_cache_config = kv_cache_config
|
|
547
550
|
self.use_hybrid_kvcache = len(kv_cache_config.kv_cache_groups) > 1
|
|
548
|
-
self.kv_caches = []
|
|
549
551
|
self.kv_cache_manager.initialize_kv_cache(kv_cache_config)
|
|
550
552
|
if has_kv_transfer_group():
|
|
551
553
|
get_kv_transfer_group().register_runner(self)
|
|
@@ -827,7 +829,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
827
829
|
sharding = None
|
|
828
830
|
if self.dp_size > 1:
|
|
829
831
|
sharding = NamedSharding(self.mesh,
|
|
830
|
-
PartitionSpec(ShardingAxisName.
|
|
832
|
+
PartitionSpec(ShardingAxisName.MLP_DATA))
|
|
831
833
|
|
|
832
834
|
tpu_sampling_metadata = TPUSupportedSamplingMetadata.from_input_batch(
|
|
833
835
|
self.mesh, self.input_batch, padded_num_reqs, sharding=sharding)
|
|
@@ -1390,7 +1392,8 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
1390
1392
|
self.mesh,
|
|
1391
1393
|
self.input_batch,
|
|
1392
1394
|
padded_num_reqs,
|
|
1393
|
-
sharding=
|
|
1395
|
+
sharding=NamedSharding(self.mesh,
|
|
1396
|
+
PartitionSpec(ShardingAxisName.MLP_DATA)),
|
|
1394
1397
|
)
|
|
1395
1398
|
if self.uses_mrope:
|
|
1396
1399
|
positions = mrope_positions
|
|
@@ -1680,7 +1683,7 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
1680
1683
|
def _get_input_ids_embeds(self, input_ids: jax.Array,
|
|
1681
1684
|
mm_embeds: list[jax.Array]):
|
|
1682
1685
|
if self.is_multimodal_model:
|
|
1683
|
-
inputs_embeds = self.
|
|
1686
|
+
inputs_embeds = self.embed_input_ids_fn(
|
|
1684
1687
|
self.state,
|
|
1685
1688
|
input_ids,
|
|
1686
1689
|
mm_embeds,
|
tpu_inference/utils.py
CHANGED
|
@@ -3,7 +3,7 @@ import time
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from collections.abc import Sequence
|
|
5
5
|
from functools import wraps
|
|
6
|
-
from typing import Any, Callable, List, Tuple
|
|
6
|
+
from typing import Any, Callable, List, Tuple, Union
|
|
7
7
|
|
|
8
8
|
import jax
|
|
9
9
|
import jax.numpy as jnp
|
|
@@ -283,35 +283,6 @@ def get_hash_fn_by_name(hash_fn_name: str) -> Callable[[Any], bytes]:
|
|
|
283
283
|
return utils.hashing.get_hash_fn_by_name(hash_fn_name)
|
|
284
284
|
|
|
285
285
|
|
|
286
|
-
def quantize_kv(key: jax.Array, value: jax.Array,
|
|
287
|
-
kv_cache_quantized_dtype: jnp.dtype, k_scale: float,
|
|
288
|
-
v_scale: float) -> Tuple[jax.Array, jax.Array]:
|
|
289
|
-
"""
|
|
290
|
-
Quantize the key and value tensors.
|
|
291
|
-
|
|
292
|
-
Args:
|
|
293
|
-
key: The key tensor to quantize.
|
|
294
|
-
value: The value tensor to quantize.
|
|
295
|
-
kv_cache_quantized_dtype: The dtype to quantize the key and value tensors to.
|
|
296
|
-
q_scale: The scale to quantize the key and value tensors by.
|
|
297
|
-
k_scale: The scale to quantize the key tensor by.
|
|
298
|
-
v_scale: The scale to quantize the value tensor by.
|
|
299
|
-
|
|
300
|
-
Returns:
|
|
301
|
-
Tuple[jax.Array, jax.Array]: The quantized key and value tensors.
|
|
302
|
-
"""
|
|
303
|
-
dtype_info = jnp.finfo(kv_cache_quantized_dtype)
|
|
304
|
-
minval, maxval = float(dtype_info.min), float(dtype_info.max)
|
|
305
|
-
key = key.astype(jnp.float32) / k_scale
|
|
306
|
-
key = jnp.clip(key, minval, maxval)
|
|
307
|
-
key = key.astype(kv_cache_quantized_dtype)
|
|
308
|
-
value = value.astype(jnp.float32) / v_scale
|
|
309
|
-
value = jnp.clip(value, minval, maxval)
|
|
310
|
-
value = value.astype(kv_cache_quantized_dtype)
|
|
311
|
-
|
|
312
|
-
return key, value
|
|
313
|
-
|
|
314
|
-
|
|
315
286
|
def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
|
|
316
287
|
"""
|
|
317
288
|
Get the JAX dtype from a string dtype.
|
|
@@ -326,6 +297,36 @@ def get_jax_dtype_from_str_dtype(str_dtype: str) -> jnp.dtype:
|
|
|
326
297
|
return to_jax_dtype(str_dtype)
|
|
327
298
|
|
|
328
299
|
|
|
300
|
+
def get_mesh_shape_product(
|
|
301
|
+
mesh: Mesh,
|
|
302
|
+
axes: Union[str, list[str], None],
|
|
303
|
+
) -> int:
|
|
304
|
+
"""
|
|
305
|
+
Get the product of mesh dimensions for one or more axes.
|
|
306
|
+
|
|
307
|
+
Examples:
|
|
308
|
+
# Single axis (defaults to 1 if not present)
|
|
309
|
+
get_mesh_shape_product(mesh, "model")
|
|
310
|
+
|
|
311
|
+
# Multiple axes - computes product of their sizes
|
|
312
|
+
get_mesh_shape_product(mesh, ["model", "attn_dp"])
|
|
313
|
+
|
|
314
|
+
# None means no sharding on this dimension
|
|
315
|
+
get_mesh_shape_product(mesh, None) # returns 1
|
|
316
|
+
"""
|
|
317
|
+
if axes is None:
|
|
318
|
+
return 1
|
|
319
|
+
|
|
320
|
+
if isinstance(axes, str):
|
|
321
|
+
axes = [axes]
|
|
322
|
+
|
|
323
|
+
product = 1
|
|
324
|
+
for axis in axes:
|
|
325
|
+
product *= mesh.shape.get(axis, 1)
|
|
326
|
+
|
|
327
|
+
return product
|
|
328
|
+
|
|
329
|
+
|
|
329
330
|
def time_function(func):
|
|
330
331
|
"""
|
|
331
332
|
A decorator to measure the execution time of a function.
|
|
@@ -431,8 +431,11 @@ class TPUWorker:
|
|
|
431
431
|
) -> None:
|
|
432
432
|
"""Allocate GPU KV cache with the specified kv_cache_config."""
|
|
433
433
|
# Precompile functions with large vocab_size tensors before allocating KV cache to avoid OOM
|
|
434
|
-
|
|
435
|
-
|
|
434
|
+
if not (envs.SKIP_JAX_PRECOMPILE or
|
|
435
|
+
(hasattr(self.model_runner.model_config, "enforce_eager")
|
|
436
|
+
and self.model_runner.model_config.enforce_eager)):
|
|
437
|
+
self.model_runner.compilation_manager._precompile_sampling()
|
|
438
|
+
self.model_runner.compilation_manager._precompile_gather_logprobs()
|
|
436
439
|
self.model_runner.initialize_kv_cache(kv_cache_config,
|
|
437
440
|
self.topology_order_id)
|
|
438
441
|
|
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/RECORD
RENAMED
|
@@ -7,21 +7,22 @@ tests/core/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
|
7
7
|
tests/core/test_core_tpu.py,sha256=r496rk1eOsK_F4nvm9zprl_T-RcO6eCUb7LuVReOZno,21413
|
|
8
8
|
tests/core/test_disagg_executor.py,sha256=QdE2YZs08EyDDCmSjhiXkXqQ9BJTgO6csr_E1xkkfSg,2256
|
|
9
9
|
tests/core/test_disagg_utils.py,sha256=A5icdqkJlau2PHYAxHfHKuqrlEKXVJu2nm02XOrXjcc,2530
|
|
10
|
-
tests/core/test_dp_scheduler.py,sha256=
|
|
10
|
+
tests/core/test_dp_scheduler.py,sha256=m6ph_OH9tXz6AxNde8cIjptd1lwDVSCqIV2Ef-cNJFk,34253
|
|
11
11
|
tests/core/test_init.py,sha256=5BDDC-dmDtWEGaBPjQSiYJuMiwTBVRSDx9p7Cv8DKyI,2262
|
|
12
12
|
tests/distributed/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
13
13
|
tests/distributed/test_distributed_utils.py,sha256=YXKbSG9J72vCrU5mPiFf1ya-Yzc1BjeahdBmQVez8Wc,5031
|
|
14
14
|
tests/distributed/test_tpu_connector.py,sha256=ajKeRUi3x29hQXfLrSlo6yDczpwZsg_mGt2vKBGRZdk,20538
|
|
15
15
|
tests/e2e/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
16
16
|
tests/e2e/test_async_scheduler.py,sha256=215xGuyTEBSOe-c1l48TIjrCqhbVFZY3m5p3q5mU7jA,6905
|
|
17
|
-
tests/e2e/test_data_parallel.py,sha256=
|
|
17
|
+
tests/e2e/test_data_parallel.py,sha256=KB-_BKic_iZyn4WbPWsUdVClinzd8g7PrQ0ui5B-nwo,10725
|
|
18
|
+
tests/e2e/test_hybrid_kvcache.py,sha256=Y7a-grjvAKBbp7vbQncVEQKGM1WxcwO0qa2o0opKiEI,8076
|
|
18
19
|
tests/e2e/test_local_disagg.py,sha256=xIjYI6RGA6bZk4dluklhfYBoJGbHkrSihSkJtPgpZv4,10434
|
|
19
20
|
tests/e2e/test_model_loader.py,sha256=DYlS420KXkNzeIijAf-0UQsYH0pOAGcXRl6P99PBiAc,9366
|
|
20
21
|
tests/e2e/test_multi_modal_inference.py,sha256=hVatj8Rra6XAekp6zBxRivQUcGiV8SimPph9cZ-TJyk,3896
|
|
21
22
|
tests/e2e/test_pipeline_parallel.py,sha256=VpxY9wgQj3-i0XooHZHdmHGdMS3ilmHbxu6ZfyQDUP0,9519
|
|
22
23
|
tests/e2e/test_runai_model_streamer_loader.py,sha256=MXUxKfKV7vVM_LI7-5hBV-wCswogPENkMPsREUjFu3I,3790
|
|
23
24
|
tests/e2e/test_sampling_params.py,sha256=ibLWtJfS35HughdOBtXD2IcyWPXoZA4R4KwXz-RzgOY,10683
|
|
24
|
-
tests/e2e/test_speculative_decoding.py,sha256=
|
|
25
|
+
tests/e2e/test_speculative_decoding.py,sha256=tj3VSJEi7r9aHjywZanlmfY4eS5Tfr5zPe9TH3PW5EY,9911
|
|
25
26
|
tests/e2e/test_structured_decoding.py,sha256=QYh9WjGrzm7syeLrGUawA6cOkWlQqVpTn7W6qwt65NY,1863
|
|
26
27
|
tests/executors/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
27
28
|
tests/executors/test_ray_distributed_executor.py,sha256=rMazBfirGsehEUXgpIPJkw0z7xO4cnK2kzcgxjFA6Bo,8435
|
|
@@ -44,7 +45,7 @@ tests/layers/common/test_attention_interface.py,sha256=ke6h-e8CP-FhNY_ojKCYwyHgY
|
|
|
44
45
|
tests/layers/common/test_quantization.py,sha256=JcwDrNTm6UlBSV3s3mwwvpxOjqBpZDJwnYYoj3DnS7A,5344
|
|
45
46
|
tests/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
46
47
|
tests/layers/jax/test_layers.py,sha256=L1xh_wniBtlfudya_WRmHUWOhEno0i6ikKE1XiBtaZs,5010
|
|
47
|
-
tests/layers/jax/test_qwix.py,sha256=
|
|
48
|
+
tests/layers/jax/test_qwix.py,sha256=V8MpFKJb5_evs-Z4WeZ5SxA-KAyFD6Qrex7ExywLxmE,39744
|
|
48
49
|
tests/layers/jax/test_rope.py,sha256=0biwYRSRsKMaRHknc8v8Tfrt0bmJKQGeQLPqR_D04mM,3565
|
|
49
50
|
tests/layers/jax/test_sharding.py,sha256=Hk1MWhIluOKIBx7-O9fKa1n6fF3SW7UMYsRI9AGzp_0,5914
|
|
50
51
|
tests/layers/jax/test_transformer_block.py,sha256=Wpgowc0ZJnv1GUxcK-Op6CCYWjpqgUM0p3EANk-YWzc,5742
|
|
@@ -62,12 +63,12 @@ tests/layers/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs
|
|
|
62
63
|
tests/layers/vllm/test_attention.py,sha256=NSbeKIi4eQj9RLiHeT-aEDvvsiHYbD3rk4uXq3_5_X8,13193
|
|
63
64
|
tests/layers/vllm/test_awq.py,sha256=0aFURqn3zh0Ueytvfzy6SGon0gPRzk8Dn0DuCnpu_XQ,14479
|
|
64
65
|
tests/layers/vllm/test_compressed_tensors_moe.py,sha256=jlMZcbQWlgaLX4pAlEMjZbJ7a0NyjxIhqXUW5DGH6KM,7385
|
|
65
|
-
tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py,sha256=
|
|
66
|
-
tests/layers/vllm/test_compressed_tensors_w8a8_int8.py,sha256=
|
|
66
|
+
tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py,sha256=eTXSWaPcKgOEAyWt7Xqqkhd0fa2J3QFa3QJBRQjTsaY,15521
|
|
67
|
+
tests/layers/vllm/test_compressed_tensors_w8a8_int8.py,sha256=dCKHPos33MIwJnK96zlIUvJYN0PpRPXdyPyo-PnKH3U,16555
|
|
67
68
|
tests/layers/vllm/test_fp8.py,sha256=ZvFTg4Umgg6W2RwElkIZ_Rls_XZJ8sEW7yww2K3ztf4,666
|
|
68
|
-
tests/layers/vllm/test_mxfp4.py,sha256=
|
|
69
|
-
tests/layers/vllm/test_unquantized.py,sha256=
|
|
70
|
-
tests/layers/vllm/utils.py,sha256=
|
|
69
|
+
tests/layers/vllm/test_mxfp4.py,sha256=sFer788F7pbDUtB0yB6WU9Lh9kzUOaxXP6XXVsvQHrc,11625
|
|
70
|
+
tests/layers/vllm/test_unquantized.py,sha256=iqoqib_Rv2DdmKA2ub6T6cIT67PSTc3s7gpYzBTs_qI,24432
|
|
71
|
+
tests/layers/vllm/utils.py,sha256=Qk67IqSrSovhPlWmDGFBr5vwgwtG7kcUzy69-oPgR0A,3105
|
|
71
72
|
tests/lora/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
72
73
|
tests/lora/conftest.py,sha256=OI4gPV4vNOCcfE93ccmIWQHd8-Gp9c2yGVlaSnuT4Tg,1559
|
|
73
74
|
tests/lora/test_bgmv.py,sha256=B1HCjh27379vCxZsd8nKMBZ8lr1JamuuWDgYiALyn18,1934
|
|
@@ -77,7 +78,7 @@ tests/lora/test_lora_perf.py,sha256=zcZud9Hexx6wa9qX0IvnjKyDD-i61NdIQrVO31Yx3vU,
|
|
|
77
78
|
tests/lora/utils.py,sha256=rY0tDZEZe58ye4-ykwrTnsiWuLcaEG57N_Rua90bDXI,2726
|
|
78
79
|
tests/models/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
79
80
|
tests/models/common/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
80
|
-
tests/models/common/test_model_loader.py,sha256=
|
|
81
|
+
tests/models/common/test_model_loader.py,sha256=Sf-k_Kxdjkz-lS_0-ICfA4Yk2VXX33esP8PNG4B7FzA,17392
|
|
81
82
|
tests/models/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
82
83
|
tests/models/jax/test_deepseek_v3.py,sha256=9RY6ypfvPts3NOnvWu9n_T7pUjrvj_QY_saLOKpFg4c,16243
|
|
83
84
|
tests/models/jax/test_llama3.py,sha256=NYsT35yh9GzkYYcLcOo1BkBGGr14E89GtdCJJ6SFhI8,6610
|
|
@@ -85,7 +86,7 @@ tests/models/jax/test_llama4.py,sha256=MMQzTymnVUdWZ6XoOD8k9Q2ikmAk6tFSGB1C5DCi7
|
|
|
85
86
|
tests/models/jax/test_llama_eagle3.py,sha256=DCk1ae9SLJUrqyx7uvNOmpqAAM09xb0rYNOst-Leo_M,7777
|
|
86
87
|
tests/models/jax/test_llama_guard_4.py,sha256=w-8cKwuTRFyzDh2mxvAofrt5xUprZyqRm5DRVRamGwE,9322
|
|
87
88
|
tests/models/jax/test_qwen2.py,sha256=xylG-LmHBSy76V-Yl5KiAXogpZPM2w3Mx0E61Ud5sO4,6227
|
|
88
|
-
tests/models/jax/test_qwen2_5_vl.py,sha256=
|
|
89
|
+
tests/models/jax/test_qwen2_5_vl.py,sha256=PfB_gecAvXNrksxt8E56yP6d8ioZZWMoUIvh-OrbzJ4,26299
|
|
89
90
|
tests/models/jax/test_qwen3.py,sha256=NWLAZPwGIhZjW0OADk4JqU4ZPn8JGSGPwkbTQvKEc50,6021
|
|
90
91
|
tests/models/jax/test_weight_loading.py,sha256=RlmByQcjrsefybeNlS9wnL522be6CSR7YLcb7O5eZ-A,5205
|
|
91
92
|
tests/models/jax/utils/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
@@ -97,12 +98,12 @@ tests/runner/test_block_table.py,sha256=gFGF425mpWfOLjnQeQiG18TqFko8vpilJ3AiiiV1
|
|
|
97
98
|
tests/runner/test_input_batch.py,sha256=7nEkB00JrhaKCKf1ep28iedYbNbuqEdaQAxYqHaXThc,8198
|
|
98
99
|
tests/runner/test_kv_cache.py,sha256=TvxmJNI8lM0ZNllZonHySA8NCQZ7prBgNODpYEI787E,7394
|
|
99
100
|
tests/runner/test_kv_cache_manager.py,sha256=dYVWQamfGwqytnumfvjRt2r3n9BRBqcSbCXGWnw1SXs,22461
|
|
100
|
-
tests/runner/test_multimodal_manager.py,sha256=
|
|
101
|
+
tests/runner/test_multimodal_manager.py,sha256=8RbHHMvRuHg1Scc0b70tsr-tF2lfk8SZVx3InVgIryc,18591
|
|
101
102
|
tests/runner/test_persistent_batch_manager.py,sha256=EW6P-BtI4i59Clx-Lh84fU1GtDKF3Av2gtO-rCRYN_k,3148
|
|
102
103
|
tests/runner/test_speculative_decoding_manager.py,sha256=HgemtiBL_VhBheUgem3OpPj6yBK9vdJsL8VCABQdGXw,16093
|
|
103
104
|
tests/runner/test_structured_decoding_manager.py,sha256=pVX3z2TLR6SfBoEyRtv0BPajHbMVdcOAe4opMoxEpps,9802
|
|
104
|
-
tests/runner/test_tpu_runner.py,sha256=
|
|
105
|
-
tests/runner/test_tpu_runner_dp.py,sha256=
|
|
105
|
+
tests/runner/test_tpu_runner.py,sha256=H1RjGGvNPfNNhglbiUs9J2QsokXaDtnmmtdoYRvA5_8,11649
|
|
106
|
+
tests/runner/test_tpu_runner_dp.py,sha256=TAEmI-JaIodgYNjjjQAAQg-q0bSbeVON5ZZE2jngfOk,50851
|
|
106
107
|
tests/runner/test_tpu_runner_mesh.py,sha256=kDyjdnd0vO4GQrcOAPLr9TEYA49-qDFE4gHt9IL6wlk,8638
|
|
107
108
|
tests/runner/test_utils.py,sha256=_R2bnKttqgg7vfPXP0Qfx38mr-4UBm2UMIbuQFAwgWk,15442
|
|
108
109
|
tests/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
@@ -114,13 +115,13 @@ tpu_inference/env_override.py,sha256=pmL7lfs_rGCP92ya3wuWuudsCYeOMZ6tFZY82A4KkQc
|
|
|
114
115
|
tpu_inference/envs.py,sha256=A1Bdm5qiXhTdu-Q_yNzBpi79_nOJIDbdFF7MAMqmjxo,6662
|
|
115
116
|
tpu_inference/logger.py,sha256=HQCz7NefmbturuhOC7-3Ixbtcdgoz4g9FHh2RB6o8cc,334
|
|
116
117
|
tpu_inference/tpu_info.py,sha256=lty-ngN1uUvQLlFGkWa2u5eEb5anwmcv_uyI0S95PdY,2840
|
|
117
|
-
tpu_inference/utils.py,sha256=
|
|
118
|
+
tpu_inference/utils.py,sha256=0fQXcZJ4IiPGlNv_bLdkla5FeEEKEzyTsSDH-y47ouo,10641
|
|
118
119
|
tpu_inference/core/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
119
120
|
tpu_inference/core/core_tpu.py,sha256=WDD3koE_j1QhWS2BbMA2aQOZayPZm4tYPvzL4YCX2jY,33294
|
|
120
121
|
tpu_inference/core/disagg_executor.py,sha256=HZpgYMVxRxm0RQxO4l8IDYBWJ6Z3Tac6xavc5otcirc,4657
|
|
121
122
|
tpu_inference/core/disagg_utils.py,sha256=lv8MAVoAjtcmTaenUXVokg2q3d0tzsma86UiQlQ3omY,1492
|
|
122
123
|
tpu_inference/core/sched/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
123
|
-
tpu_inference/core/sched/dp_scheduler.py,sha256
|
|
124
|
+
tpu_inference/core/sched/dp_scheduler.py,sha256=-7d2zopJ5ZJFIJ8LbHsm_4bBBtP7qrim4XWVPDF6vrg,34960
|
|
124
125
|
tpu_inference/distributed/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
125
126
|
tpu_inference/distributed/jax_parallel_state.py,sha256=xMK0tEtblh37_LoHvp1-6qPI8AgX4HkE0ATuc7fdHKs,2798
|
|
126
127
|
tpu_inference/distributed/tpu_connector.py,sha256=3rR0y2P1MOOSM8nBfvl95ZQcVKMms3rL8zTdnxUmSms,29946
|
|
@@ -144,7 +145,7 @@ tpu_inference/kernels/megablox/common.py,sha256=CoJPNom6anJU9B4i05d2skytJEvNS994
|
|
|
144
145
|
tpu_inference/kernels/megablox/gmm.py,sha256=rVW70SGPshR9XvHiwzmskX4_yeD4nE8or3RfabwcCLM,24240
|
|
145
146
|
tpu_inference/kernels/mla/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
146
147
|
tpu_inference/kernels/mla/v1/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
147
|
-
tpu_inference/kernels/mla/v1/kernel.py,sha256=
|
|
148
|
+
tpu_inference/kernels/mla/v1/kernel.py,sha256=oovjb0x3qz08IL_KVjLLbNbcEcFXip55fqgIgfnl3RA,49758
|
|
148
149
|
tpu_inference/kernels/quantized_matmul/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
149
150
|
tpu_inference/kernels/quantized_matmul/kernel.py,sha256=-A9Kd2ApHWgPvCaUPfjM5JooLz_iCfWV1UT0taaZaAo,16264
|
|
150
151
|
tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py,sha256=3zhIm73JEE8qOty2_0v3AJlVz13k6qMB5wlXBDyC1EM,35130
|
|
@@ -167,7 +168,7 @@ tpu_inference/layers/common/attention_metadata.py,sha256=rmipY517sefHe4owxC5USkm
|
|
|
167
168
|
tpu_inference/layers/common/binary_search.py,sha256=ZQi-z1wG6WTcfVQXeTGOZokX4K1DSf9kCzqfrhEU8lk,12320
|
|
168
169
|
tpu_inference/layers/common/quant_methods.py,sha256=SCm9g7bE02XSMONmOCuT0vfHeTP6RzGQ57aTj919HgM,772
|
|
169
170
|
tpu_inference/layers/common/quantization.py,sha256=cTuoCpU3qBdPvoy_6R6uwCyz9ojh6esvl9x3bQeMbs4,8710
|
|
170
|
-
tpu_inference/layers/common/sharding.py,sha256=
|
|
171
|
+
tpu_inference/layers/common/sharding.py,sha256=curCejZPj8ND4rxjWEbwRozkFYlK_HlpIyTywhDHcWU,26171
|
|
171
172
|
tpu_inference/layers/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
172
173
|
tpu_inference/layers/jax/base.py,sha256=UhT4ut_59ynUPdaZGpMPSCQkPTWXA9BxkaPy7lDhoLI,6350
|
|
173
174
|
tpu_inference/layers/jax/constants.py,sha256=YQJOeAbja1yTbPhoOWMp24OF1RCMwPybK1NIwPrrYJ0,3329
|
|
@@ -179,28 +180,28 @@ tpu_inference/layers/jax/rope_interface.py,sha256=cPqVpKG5_SU7S7xcrMEaPBJLqi1nC4
|
|
|
179
180
|
tpu_inference/layers/jax/transformer_block.py,sha256=HTI0fYPQd23UbnJSB_pL2K3un3q_i3guvJiNCUReVRs,4492
|
|
180
181
|
tpu_inference/layers/jax/attention/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
181
182
|
tpu_inference/layers/jax/attention/attention.py,sha256=_N5W4ox8EzC1CZYcIhsEi35X8WCIMFEBlSzVtDDcTu8,10623
|
|
182
|
-
tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=
|
|
183
|
-
tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=
|
|
184
|
-
tpu_inference/layers/jax/attention/llama4_attention.py,sha256=
|
|
183
|
+
tpu_inference/layers/jax/attention/deepseek_v3_attention.py,sha256=KP-hgck-wTzTcwDNB08DwNiqsE-6OD4tQ1jLVwWQvEw,22427
|
|
184
|
+
tpu_inference/layers/jax/attention/gpt_oss_attention.py,sha256=EM1kJpr77VHh95aSD5UnSJazB_anS_7PyaD8TixVMrY,9241
|
|
185
|
+
tpu_inference/layers/jax/attention/llama4_attention.py,sha256=QzBDoEioI9mMdI1T2LNlsr89iaGl234e-9s202YWS8M,6713
|
|
185
186
|
tpu_inference/layers/jax/moe/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
186
187
|
tpu_inference/layers/jax/moe/deepseek_v3_moe.py,sha256=5j6TJO8fAB2Yv6mVAeM2F9WLe4QDM9bf6zxtdKjHjCQ,26456
|
|
187
188
|
tpu_inference/layers/jax/moe/gpt_oss_moe.py,sha256=-uliFqHJFOTT9WJCEpGhkImOXMSoo3aePXMOmKXlgmk,6771
|
|
188
189
|
tpu_inference/layers/jax/moe/moe.py,sha256=E7L8bJucTVke89o048GAbWdtuQIL5oDz-MkW0NK4E00,10114
|
|
189
190
|
tpu_inference/layers/jax/sample/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
190
191
|
tpu_inference/layers/jax/sample/rejection_sampler.py,sha256=VqN0mxi7Xg58w4EXS625ndC8NyA_UZMV9bjFM1mkvrY,21000
|
|
191
|
-
tpu_inference/layers/jax/sample/sampling.py,sha256=
|
|
192
|
+
tpu_inference/layers/jax/sample/sampling.py,sha256=IfJBFSXuTdd0QELn8Opmh7HgdzKreIwGYUOskTFp4aI,3888
|
|
192
193
|
tpu_inference/layers/jax/sample/sampling_metadata.py,sha256=bip7TQcw-VHyN6072zBQY-tA0-QTyJpnuYg04mw9Sv0,3136
|
|
193
194
|
tpu_inference/layers/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
194
195
|
tpu_inference/layers/vllm/attention.py,sha256=LMQbS2KAup0Q-mmN5pzV6uUs-qdGpTSH8eV6ByHde9g,7370
|
|
195
|
-
tpu_inference/layers/vllm/fused_moe.py,sha256=
|
|
196
|
+
tpu_inference/layers/vllm/fused_moe.py,sha256=NdMVpDLI5-5274EuhVtH8KZzCnLBqSZSSvRoZqzwY7s,19868
|
|
196
197
|
tpu_inference/layers/vllm/linear_common.py,sha256=--jpy8vf0RkZ0jDU1QfXT-V-RnjIloNSodYQKiw4Txo,9129
|
|
197
|
-
tpu_inference/layers/vllm/sharding.py,sha256=
|
|
198
|
+
tpu_inference/layers/vllm/sharding.py,sha256=f3pu7CJNRkfq5j1bmhmTM5wU9HwAePH3yWeTmaIINAw,9926
|
|
198
199
|
tpu_inference/layers/vllm/quantization/__init__.py,sha256=XYe1VwgoFqLTuLJ-i-64hzNNMSWOkoErLTA_4N_Cze0,2463
|
|
199
200
|
tpu_inference/layers/vllm/quantization/awq.py,sha256=nlWwR08lPlE_HIXLoDiGS2pOAJEiI0ukUGXos0NpbAE,9072
|
|
200
|
-
tpu_inference/layers/vllm/quantization/common.py,sha256=
|
|
201
|
+
tpu_inference/layers/vllm/quantization/common.py,sha256=GXYEvnhvRF8VWX0SHkzRpV3_LoQgAwCmXwLbEEwSm8A,5021
|
|
201
202
|
tpu_inference/layers/vllm/quantization/fp8.py,sha256=_NT7QOD-N3UAJnYSDJD24Tsp8FaSK6NuDYp78QOTyzo,4530
|
|
202
|
-
tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=
|
|
203
|
-
tpu_inference/layers/vllm/quantization/unquantized.py,sha256=
|
|
203
|
+
tpu_inference/layers/vllm/quantization/mxfp4.py,sha256=oS7e40ovqmipDKiHfpezzdP2RaFSNeXw6zv2nTrwKvc,18214
|
|
204
|
+
tpu_inference/layers/vllm/quantization/unquantized.py,sha256=LPGRKw3lkeCHCJsY70P_hDUagnmI5bNe1cHuTFUEBkc,16701
|
|
204
205
|
tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
205
206
|
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py,sha256=2RS8owCqKHXZbtWKNjdKtsfzKH9N60UyqD-ug1A83oE,5914
|
|
206
207
|
tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=aOme0LFnhVeXDxdSw-Z0k5MZutM3-EoF2vwffezCARE,11277
|
|
@@ -212,49 +213,49 @@ tpu_inference/lora/torch_lora_ops.py,sha256=YR3Hj8nLLiQ-6wXy4uFsjQxFTbJYZ4o5dh_L
|
|
|
212
213
|
tpu_inference/lora/torch_punica_tpu.py,sha256=qTnXZGLoOgvukSxeunO_SfpPTlkq9GlMj9H7zVYg9LE,12680
|
|
213
214
|
tpu_inference/models/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
214
215
|
tpu_inference/models/common/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
215
|
-
tpu_inference/models/common/model_loader.py,sha256=
|
|
216
|
+
tpu_inference/models/common/model_loader.py,sha256=TnTTION_J3nMGsIMJFuMzoBSl3VHHvhretmF5gMkyXI,21679
|
|
216
217
|
tpu_inference/models/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
217
218
|
tpu_inference/models/jax/deepseek_v3.py,sha256=mje3RgxE1NwKWVLgJnPq3ebWB1J8T6YGHT2TtxN10Dg,45031
|
|
218
219
|
tpu_inference/models/jax/gpt_oss.py,sha256=bgdsCx3UcTqEJatWBYbma5HNHH8GEaHN4aL5IsAeSmM,21592
|
|
219
220
|
tpu_inference/models/jax/jax_intermediate_tensor.py,sha256=XKpDgPkOiRtYaPrW76ILxcp2uFfSiE1JMdqHWGo0-Ss,3179
|
|
220
|
-
tpu_inference/models/jax/llama3.py,sha256=
|
|
221
|
+
tpu_inference/models/jax/llama3.py,sha256=FjTGC69V_EJmvb5BIqYu3V5NS1Pvy-5Pb34kMn5YU5U,16317
|
|
221
222
|
tpu_inference/models/jax/llama4.py,sha256=Ssycb5fcGjhJYg8FfcNckVhow7bvVt0FJbbpHinzMAA,30206
|
|
222
223
|
tpu_inference/models/jax/llama_eagle3.py,sha256=_wnljvb8lLCQ0Z3Vuw0QI7F6b41x6I1WuvstZWGvCYE,13051
|
|
223
|
-
tpu_inference/models/jax/llama_guard_4.py,sha256=
|
|
224
|
-
tpu_inference/models/jax/qwen2.py,sha256=
|
|
225
|
-
tpu_inference/models/jax/qwen2_5_vl.py,sha256=
|
|
224
|
+
tpu_inference/models/jax/llama_guard_4.py,sha256=R4wo45s1JsVD39t8JeAItujGoi-sl43HBH95hr7qEVw,15845
|
|
225
|
+
tpu_inference/models/jax/qwen2.py,sha256=bart2yYGv0J-lNbk8Hk5jn5IF6j_Jp8YKSEjwVU_y24,14038
|
|
226
|
+
tpu_inference/models/jax/qwen2_5_vl.py,sha256=3g3tUt7c83fKOdiMzuq2VyldCyeXoCBGrVYfqyIWwGE,50370
|
|
226
227
|
tpu_inference/models/jax/qwen3.py,sha256=jVOOVrBFnxRIZ_Euo90iCga8rORpz0Kqs79uKqsFwEQ,11678
|
|
227
228
|
tpu_inference/models/jax/utils/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
228
229
|
tpu_inference/models/jax/utils/file_utils.py,sha256=8iZcGNvF1N0gNioH8fBlVYTSGYn4fC2WvmlTyeDZyZM,3415
|
|
229
|
-
tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=
|
|
230
|
+
tpu_inference/models/jax/utils/multi_modal_utils.py,sha256=c2LRXdOPi3F779yg2UX-DnuFDxF1JciTcFa09iODxZs,6695
|
|
230
231
|
tpu_inference/models/jax/utils/weight_utils.py,sha256=0xyjGlDSrA09gtb4plw9yX57VPMgn3o5WNl6mXPDU70,23121
|
|
231
232
|
tpu_inference/models/jax/utils/qwix/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
232
|
-
tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=
|
|
233
|
+
tpu_inference/models/jax/utils/qwix/qwix_utils.py,sha256=w3wmDb1drJxOK1mVRVMORznqKbtZqFfi7H0Ib_k-iW8,29526
|
|
233
234
|
tpu_inference/models/vllm/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
234
|
-
tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=
|
|
235
|
+
tpu_inference/models/vllm/vllm_model_wrapper.py,sha256=G4ClHbvMY0gPpTOFWStb1mEVVMzIc3-wz1KXC-mDpj8,13023
|
|
235
236
|
tpu_inference/models/vllm/vllm_model_wrapper_context.py,sha256=vsXQnC2aZ_mHKb-7d9UeN28lfawfApNTm5asUMgEhgo,1762
|
|
236
237
|
tpu_inference/platforms/__init__.py,sha256=BK6rwAhiqVSAUJ9m9EehSKetA6hEPe92flD9Ei076WQ,649
|
|
237
|
-
tpu_inference/platforms/tpu_platform.py,sha256=
|
|
238
|
+
tpu_inference/platforms/tpu_platform.py,sha256=bGTH1k0GI5AB_He5IghJtPwuyrhceUQ-rHs41fMpwhI,9826
|
|
238
239
|
tpu_inference/runner/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
239
240
|
tpu_inference/runner/block_table.py,sha256=K3Ic8EgPM08d_C5nEN60mxoRydlaQWySAemf_8Q_qVw,4175
|
|
240
|
-
tpu_inference/runner/compilation_manager.py,sha256=
|
|
241
|
+
tpu_inference/runner/compilation_manager.py,sha256=BFjOzJUyEJTmUZAvGCm3yeqoY7Kkw2JKc_A3CzRoN7o,42112
|
|
241
242
|
tpu_inference/runner/input_batch.py,sha256=bx221NX2IOWzrtopss-B-2ZKW4y-U6nQpG09PjpUziw,18273
|
|
242
243
|
tpu_inference/runner/kv_cache.py,sha256=xpB6VTrT3lIq5JNNPJTVEnHFgehIzgxKNIHxxXIxwKI,6046
|
|
243
|
-
tpu_inference/runner/kv_cache_manager.py,sha256=
|
|
244
|
+
tpu_inference/runner/kv_cache_manager.py,sha256=u6pXaWPzmPe34lXiy-acAdGBmp9WEQrGvksyBfGBRdM,23342
|
|
244
245
|
tpu_inference/runner/lora_utils.py,sha256=DGV_8aMrqb6Q4v7eC0UvipsM-6XQSt1afiZGKTKd6sc,4418
|
|
245
|
-
tpu_inference/runner/multimodal_manager.py,sha256=
|
|
246
|
+
tpu_inference/runner/multimodal_manager.py,sha256=dQm0sQ9nGHaWRS8rVPDBZP4P6jNFcJPufnAxv8DoWYs,10344
|
|
246
247
|
tpu_inference/runner/persistent_batch_manager.py,sha256=aCeTyqCgBnQy_6hXjiNLtF81ekG0-YwlQiWeJhx-pdM,13838
|
|
247
248
|
tpu_inference/runner/speculative_decoding_manager.py,sha256=-eSxTIGXbRWRZjHJfikb7kfqbtr_cj7Pca9zInWSn1w,10790
|
|
248
249
|
tpu_inference/runner/structured_decoding_manager.py,sha256=sj1fPrit0qdhcQtDbue5kpxos7zL16_dZQ5YSXTDbzg,4148
|
|
249
|
-
tpu_inference/runner/tpu_runner.py,sha256=
|
|
250
|
+
tpu_inference/runner/tpu_runner.py,sha256=cgIyZiI3UjpvPWhNRL-mCSnssbbDNt00g5idAzwgWR0,80736
|
|
250
251
|
tpu_inference/runner/utils.py,sha256=lKqL5nxGTk7ufzJRNdp4udn2bPu3jIX52W7akXgSrHc,17133
|
|
251
252
|
tpu_inference/spec_decode/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
252
253
|
tpu_inference/spec_decode/jax/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
253
254
|
tpu_inference/spec_decode/jax/eagle3.py,sha256=5WtEbkgzXpmFz374ibQD5IIcRro4d0SNeCYgBv2nM1c,19678
|
|
254
255
|
tpu_inference/worker/__init__.py,sha256=Q9FlRO2IfSE9yEaiAYzWkOMBJPCaNYqh4ihcp0t0BQs,574
|
|
255
|
-
tpu_inference/worker/tpu_worker.py,sha256=
|
|
256
|
-
tpu_inference-0.12.0.
|
|
257
|
-
tpu_inference-0.12.0.
|
|
258
|
-
tpu_inference-0.12.0.
|
|
259
|
-
tpu_inference-0.12.0.
|
|
260
|
-
tpu_inference-0.12.0.
|
|
256
|
+
tpu_inference/worker/tpu_worker.py,sha256=ntwCibPyiw-z8aMUdtu8usqU_q2b0u7diWNOmpjG_6o,21651
|
|
257
|
+
tpu_inference-0.12.0.dev20251224.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
258
|
+
tpu_inference-0.12.0.dev20251224.dist-info/METADATA,sha256=gVLZ-35W1Nw3z2LnxeFYsNQHMRtTM7aUIAuWbxucsBg,5767
|
|
259
|
+
tpu_inference-0.12.0.dev20251224.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
260
|
+
tpu_inference-0.12.0.dev20251224.dist-info/top_level.txt,sha256=gb1hRIQ3DOawUfVzvPL2E__2KPIl9I0vb5r0xcRBGYQ,20
|
|
261
|
+
tpu_inference-0.12.0.dev20251224.dist-info/RECORD,,
|
{tpu_inference-0.12.0.dev20251222.dist-info → tpu_inference-0.12.0.dev20251224.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|