vllm-npu 0.4.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (219) hide show
  1. vllm/__init__.py +23 -0
  2. vllm/_custom_ops.py +251 -0
  3. vllm/attention/__init__.py +13 -0
  4. vllm/attention/backends/__init__.py +0 -0
  5. vllm/attention/backends/abstract.py +127 -0
  6. vllm/attention/backends/flash_attn.py +271 -0
  7. vllm/attention/backends/flashinfer.py +220 -0
  8. vllm/attention/backends/rocm_flash_attn.py +374 -0
  9. vllm/attention/backends/torch_sdpa.py +250 -0
  10. vllm/attention/backends/xformers.py +393 -0
  11. vllm/attention/layer.py +56 -0
  12. vllm/attention/ops/__init__.py +0 -0
  13. vllm/attention/ops/paged_attn.py +216 -0
  14. vllm/attention/ops/prefix_prefill.py +792 -0
  15. vllm/attention/ops/triton_flash_attention.py +810 -0
  16. vllm/attention/selector.py +91 -0
  17. vllm/block.py +84 -0
  18. vllm/config.py +1225 -0
  19. vllm/core/__init__.py +0 -0
  20. vllm/core/block/__init__.py +0 -0
  21. vllm/core/block/block_table.py +295 -0
  22. vllm/core/block/common.py +199 -0
  23. vllm/core/block/cpu_gpu_block_allocator.py +228 -0
  24. vllm/core/block/interfaces.py +205 -0
  25. vllm/core/block/naive_block.py +318 -0
  26. vllm/core/block/prefix_caching_block.py +606 -0
  27. vllm/core/block_manager_v1.py +625 -0
  28. vllm/core/block_manager_v2.py +258 -0
  29. vllm/core/evictor_v1.py +105 -0
  30. vllm/core/evictor_v2.py +127 -0
  31. vllm/core/interfaces.py +113 -0
  32. vllm/core/policy.py +45 -0
  33. vllm/core/scheduler.py +1163 -0
  34. vllm/distributed/__init__.py +3 -0
  35. vllm/distributed/communication_op.py +237 -0
  36. vllm/distributed/device_communicators/__init__.py +0 -0
  37. vllm/distributed/device_communicators/custom_all_reduce.py +274 -0
  38. vllm/distributed/device_communicators/pynccl.py +287 -0
  39. vllm/distributed/device_communicators/pynccl_utils.py +66 -0
  40. vllm/distributed/parallel_state.py +339 -0
  41. vllm/distributed/utils.py +136 -0
  42. vllm/engine/__init__.py +0 -0
  43. vllm/engine/arg_utils.py +649 -0
  44. vllm/engine/async_llm_engine.py +737 -0
  45. vllm/engine/llm_engine.py +784 -0
  46. vllm/engine/metrics.py +368 -0
  47. vllm/engine/output_processor/__init__.py +0 -0
  48. vllm/engine/output_processor/interfaces.py +76 -0
  49. vllm/engine/output_processor/multi_step.py +142 -0
  50. vllm/engine/output_processor/single_step.py +284 -0
  51. vllm/engine/output_processor/stop_checker.py +101 -0
  52. vllm/engine/output_processor/util.py +19 -0
  53. vllm/entrypoints/__init__.py +0 -0
  54. vllm/entrypoints/api_server.py +119 -0
  55. vllm/entrypoints/llm.py +259 -0
  56. vllm/entrypoints/openai/__init__.py +0 -0
  57. vllm/entrypoints/openai/api_server.py +186 -0
  58. vllm/entrypoints/openai/cli_args.py +115 -0
  59. vllm/entrypoints/openai/protocol.py +460 -0
  60. vllm/entrypoints/openai/serving_chat.py +392 -0
  61. vllm/entrypoints/openai/serving_completion.py +347 -0
  62. vllm/entrypoints/openai/serving_engine.py +234 -0
  63. vllm/envs.py +217 -0
  64. vllm/executor/__init__.py +0 -0
  65. vllm/executor/cpu_executor.py +152 -0
  66. vllm/executor/distributed_gpu_executor.py +115 -0
  67. vllm/executor/executor_base.py +115 -0
  68. vllm/executor/gpu_executor.py +150 -0
  69. vllm/executor/multiproc_worker_utils.py +263 -0
  70. vllm/executor/neuron_executor.py +91 -0
  71. vllm/executor/ray_gpu_executor.py +327 -0
  72. vllm/executor/ray_utils.py +119 -0
  73. vllm/logger.py +153 -0
  74. vllm/logging/__init__.py +5 -0
  75. vllm/logging/formatter.py +15 -0
  76. vllm/lora/__init__.py +0 -0
  77. vllm/lora/fully_sharded_layers.py +262 -0
  78. vllm/lora/layers.py +1181 -0
  79. vllm/lora/lora.py +167 -0
  80. vllm/lora/models.py +645 -0
  81. vllm/lora/punica.py +213 -0
  82. vllm/lora/request.py +32 -0
  83. vllm/lora/utils.py +98 -0
  84. vllm/lora/worker_manager.py +251 -0
  85. vllm/model_executor/__init__.py +7 -0
  86. vllm/model_executor/guided_decoding/__init__.py +25 -0
  87. vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py +70 -0
  88. vllm/model_executor/guided_decoding/outlines_decoding.py +130 -0
  89. vllm/model_executor/guided_decoding/outlines_logits_processors.py +184 -0
  90. vllm/model_executor/layers/__init__.py +0 -0
  91. vllm/model_executor/layers/activation.py +173 -0
  92. vllm/model_executor/layers/fused_moe/__init__.py +7 -0
  93. vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
  94. vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  95. vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  96. vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  97. vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  98. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
  99. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  100. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  101. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  102. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  103. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
  104. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  105. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json +140 -0
  106. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  107. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  108. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  109. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  110. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json +146 -0
  111. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  112. vllm/model_executor/layers/fused_moe/fused_moe.py +479 -0
  113. vllm/model_executor/layers/layernorm.py +71 -0
  114. vllm/model_executor/layers/linear.py +709 -0
  115. vllm/model_executor/layers/logits_processor.py +115 -0
  116. vllm/model_executor/layers/ops/__init__.py +0 -0
  117. vllm/model_executor/layers/ops/rand.py +157 -0
  118. vllm/model_executor/layers/ops/sample.py +406 -0
  119. vllm/model_executor/layers/quantization/__init__.py +35 -0
  120. vllm/model_executor/layers/quantization/aqlm.py +376 -0
  121. vllm/model_executor/layers/quantization/awq.py +175 -0
  122. vllm/model_executor/layers/quantization/base_config.py +97 -0
  123. vllm/model_executor/layers/quantization/fp8.py +265 -0
  124. vllm/model_executor/layers/quantization/gptq.py +224 -0
  125. vllm/model_executor/layers/quantization/gptq_marlin.py +438 -0
  126. vllm/model_executor/layers/quantization/marlin.py +227 -0
  127. vllm/model_executor/layers/quantization/schema.py +84 -0
  128. vllm/model_executor/layers/quantization/squeezellm.py +137 -0
  129. vllm/model_executor/layers/rejection_sampler.py +405 -0
  130. vllm/model_executor/layers/rotary_embedding.py +525 -0
  131. vllm/model_executor/layers/sampler.py +1051 -0
  132. vllm/model_executor/layers/vocab_parallel_embedding.py +155 -0
  133. vllm/model_executor/model_loader/__init__.py +30 -0
  134. vllm/model_executor/model_loader/loader.py +362 -0
  135. vllm/model_executor/model_loader/neuron.py +136 -0
  136. vllm/model_executor/model_loader/tensorizer.py +368 -0
  137. vllm/model_executor/model_loader/utils.py +41 -0
  138. vllm/model_executor/model_loader/weight_utils.py +372 -0
  139. vllm/model_executor/models/__init__.py +119 -0
  140. vllm/model_executor/models/baichuan.py +410 -0
  141. vllm/model_executor/models/bloom.py +327 -0
  142. vllm/model_executor/models/chatglm.py +386 -0
  143. vllm/model_executor/models/commandr.py +373 -0
  144. vllm/model_executor/models/dbrx.py +413 -0
  145. vllm/model_executor/models/decilm.py +122 -0
  146. vllm/model_executor/models/deepseek.py +438 -0
  147. vllm/model_executor/models/falcon.py +444 -0
  148. vllm/model_executor/models/gemma.py +393 -0
  149. vllm/model_executor/models/gpt2.py +266 -0
  150. vllm/model_executor/models/gpt_bigcode.py +274 -0
  151. vllm/model_executor/models/gpt_j.py +281 -0
  152. vllm/model_executor/models/gpt_neox.py +295 -0
  153. vllm/model_executor/models/internlm2.py +323 -0
  154. vllm/model_executor/models/jais.py +333 -0
  155. vllm/model_executor/models/llama.py +442 -0
  156. vllm/model_executor/models/llava.py +239 -0
  157. vllm/model_executor/models/minicpm.py +531 -0
  158. vllm/model_executor/models/mixtral.py +583 -0
  159. vllm/model_executor/models/mixtral_quant.py +404 -0
  160. vllm/model_executor/models/mpt.py +295 -0
  161. vllm/model_executor/models/olmo.py +356 -0
  162. vllm/model_executor/models/opt.py +349 -0
  163. vllm/model_executor/models/orion.py +319 -0
  164. vllm/model_executor/models/phi.py +300 -0
  165. vllm/model_executor/models/qwen.py +284 -0
  166. vllm/model_executor/models/qwen2.py +367 -0
  167. vllm/model_executor/models/qwen2_moe.py +447 -0
  168. vllm/model_executor/models/stablelm.py +301 -0
  169. vllm/model_executor/models/starcoder2.py +302 -0
  170. vllm/model_executor/models/xverse.py +366 -0
  171. vllm/model_executor/sampling_metadata.py +588 -0
  172. vllm/model_executor/utils.py +35 -0
  173. vllm/outputs.py +150 -0
  174. vllm/py.typed +2 -0
  175. vllm/sampling_params.py +340 -0
  176. vllm/sequence.py +766 -0
  177. vllm/spec_decode/__init__.py +0 -0
  178. vllm/spec_decode/batch_expansion.py +397 -0
  179. vllm/spec_decode/interfaces.py +73 -0
  180. vllm/spec_decode/metrics.py +191 -0
  181. vllm/spec_decode/multi_step_worker.py +203 -0
  182. vllm/spec_decode/ngram_worker.py +176 -0
  183. vllm/spec_decode/spec_decode_worker.py +472 -0
  184. vllm/spec_decode/top1_proposer.py +200 -0
  185. vllm/spec_decode/util.py +228 -0
  186. vllm/test_utils.py +41 -0
  187. vllm/transformers_utils/__init__.py +0 -0
  188. vllm/transformers_utils/config.py +58 -0
  189. vllm/transformers_utils/configs/__init__.py +16 -0
  190. vllm/transformers_utils/configs/chatglm.py +68 -0
  191. vllm/transformers_utils/configs/dbrx.py +278 -0
  192. vllm/transformers_utils/configs/falcon.py +87 -0
  193. vllm/transformers_utils/configs/jais.py +236 -0
  194. vllm/transformers_utils/configs/mpt.py +178 -0
  195. vllm/transformers_utils/detokenizer.py +313 -0
  196. vllm/transformers_utils/tokenizer.py +149 -0
  197. vllm/transformers_utils/tokenizer_group/__init__.py +33 -0
  198. vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py +55 -0
  199. vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py +169 -0
  200. vllm/transformers_utils/tokenizer_group/tokenizer_group.py +78 -0
  201. vllm/transformers_utils/tokenizers/__init__.py +5 -0
  202. vllm/transformers_utils/tokenizers/baichuan.py +255 -0
  203. vllm/usage/__init__.py +0 -0
  204. vllm/usage/usage_lib.py +209 -0
  205. vllm/utils.py +677 -0
  206. vllm/worker/__init__.py +0 -0
  207. vllm/worker/cache_engine.py +105 -0
  208. vllm/worker/cpu_model_runner.py +346 -0
  209. vllm/worker/cpu_worker.py +321 -0
  210. vllm/worker/model_runner.py +1168 -0
  211. vllm/worker/neuron_model_runner.py +196 -0
  212. vllm/worker/neuron_worker.py +98 -0
  213. vllm/worker/worker.py +345 -0
  214. vllm/worker/worker_base.py +146 -0
  215. vllm_npu-0.4.2.dist-info/LICENSE +201 -0
  216. vllm_npu-0.4.2.dist-info/METADATA +173 -0
  217. vllm_npu-0.4.2.dist-info/RECORD +219 -0
  218. vllm_npu-0.4.2.dist-info/WHEEL +5 -0
  219. vllm_npu-0.4.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,318 @@
1
+ from typing import Dict, FrozenSet, Iterable, List, Optional, Set
2
+
3
+ from vllm.core.block.common import (CopyOnWriteTracker, RefCounter,
4
+ get_all_blocks_recursively)
5
+ from vllm.core.block.interfaces import Block, BlockAllocator, BlockId, Device
6
+
7
+ Refcount = int
8
+
9
+
10
+ class NaiveBlockAllocator(BlockAllocator):
11
+ """A simple block allocator that manages blocks of memory without prefix
12
+ caching.
13
+
14
+ Args:
15
+ create_block (Block.Factory): A factory function for creating new
16
+ blocks. This is used when a NaiveBlockAllocator is composed within
17
+ a prefix caching allocator -- the naive block allocator must
18
+ construct prefix caching blocks (but shouldn't know anything else
19
+ about them).
20
+ num_blocks (int): The total number of blocks to manage.
21
+ block_size (int): The size of each block in tokens.
22
+ block_ids (Optional[Iterable[int]], optional): An optional iterable of
23
+ block IDs. If not provided, block IDs will be assigned sequentially
24
+ from 0 to num_blocks - 1.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ create_block: Block.Factory,
30
+ num_blocks: int,
31
+ block_size: int,
32
+ block_ids: Optional[Iterable[int]] = None,
33
+ ):
34
+ if block_ids is None:
35
+ block_ids = range(num_blocks)
36
+
37
+ self._free_block_indices: Set[BlockId] = set(block_ids)
38
+ self._all_block_indices = frozenset(block_ids)
39
+ assert len(self._all_block_indices) == num_blocks
40
+
41
+ self._refcounter = RefCounter(
42
+ all_block_indices=self._free_block_indices)
43
+ self._create_block = create_block
44
+ self._block_size = block_size
45
+
46
+ self._cow_tracker = CopyOnWriteTracker(
47
+ refcounter=self._refcounter.as_readonly(),
48
+ allocator=self,
49
+ )
50
+
51
+ def allocate_immutable(self,
52
+ prev_block: Optional[Block],
53
+ token_ids: List[int],
54
+ device: Optional[Device] = None) -> Block:
55
+ """Allocates a new immutable block with the given token IDs, linked to
56
+ the previous block.
57
+
58
+ Args:
59
+ prev_block (Optional[Block]): The previous block in the sequence. If
60
+ None, then the block to be allocated is the first block in the
61
+ sequence.
62
+ token_ids (List[int]): The token IDs to be stored in the new block.
63
+
64
+ Returns:
65
+ Block: The newly allocated immutable block.
66
+ """
67
+ assert device is None
68
+ block = self.allocate_mutable(prev_block=prev_block)
69
+ block.append_token_ids(token_ids)
70
+ return block
71
+
72
+ def allocate_mutable(self,
73
+ prev_block: Optional[Block],
74
+ device: Optional[Device] = None) -> Block:
75
+ """Allocates a new mutable block, linked to the previous block.
76
+
77
+ Args:
78
+ prev_block (Optional[Block]): The previous block in the sequence. If
79
+ None, then the block to be allocated is the first block in the
80
+ sequence.
81
+
82
+ Returns:
83
+ Block: The newly allocated mutable block.
84
+ """
85
+ assert device is None
86
+ block_id = self._allocate_new_block_id()
87
+ return self._create_block(
88
+ prev_block=prev_block,
89
+ token_ids=[],
90
+ block_id=block_id,
91
+ block_size=self._block_size,
92
+ allocator=self,
93
+ )
94
+
95
+ def free(self, block: Block) -> None:
96
+ assert block.block_id is not None
97
+ self._free_block_id(block.block_id)
98
+
99
+ # Mark the block as having no allocation.
100
+ block.block_id = None
101
+
102
+ def fork(self, last_block: Block) -> List[Block]:
103
+ """Creates a new sequence of blocks that shares the same underlying
104
+ memory as the original sequence.
105
+
106
+ Args:
107
+ last_block (Block): The last block in the original sequence.
108
+
109
+ Returns:
110
+ List[Block]: The new sequence of blocks that shares the same memory
111
+ as the original sequence.
112
+ """
113
+ source_blocks = get_all_blocks_recursively(last_block)
114
+
115
+ forked_blocks = []
116
+ prev_block = None
117
+ for block in source_blocks:
118
+
119
+ # Increment refcount for each block.
120
+ assert block.block_id is not None
121
+ refcount = self._refcounter.incr(block.block_id)
122
+ assert refcount != 1, "can't fork free'd block"
123
+
124
+ forked_blocks.append(
125
+ self._create_block(
126
+ prev_block=prev_block,
127
+ token_ids=block.token_ids,
128
+ block_id=block.block_id,
129
+ block_size=self._block_size,
130
+ allocator=self,
131
+ ))
132
+ prev_block = forked_blocks[-1]
133
+
134
+ return forked_blocks
135
+
136
+ def get_num_free_blocks(self) -> int:
137
+ return len(self._free_block_indices)
138
+
139
+ def get_num_total_blocks(self) -> int:
140
+ return len(self._all_block_indices)
141
+
142
+ def _allocate_new_block_id(self) -> BlockId:
143
+ if not self._free_block_indices:
144
+ raise BlockAllocator.NoFreeBlocksError()
145
+
146
+ block_id = next(iter(self._free_block_indices))
147
+ self._refcounter.incr(block_id)
148
+ self._free_block_indices.remove(block_id)
149
+ return block_id
150
+
151
+ def _free_block_id(self, block_id: BlockId) -> None:
152
+ refcount = self._refcounter.decr(block_id)
153
+ if refcount == 0:
154
+ self._free_block_indices.add(block_id)
155
+
156
+ @property
157
+ def refcounter(self):
158
+ return self._refcounter
159
+
160
+ @property
161
+ def all_block_ids(self) -> FrozenSet[int]:
162
+ return self._all_block_indices
163
+
164
+ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
165
+ """Performs a copy-on-write operation on the given block if it is not
166
+ appendable.
167
+
168
+ Args:
169
+ block (Block): The block to check for copy-on-write.
170
+
171
+ Returns:
172
+ Optional[BlockId]: The block index of the new block if a copy-on
173
+ -write operation was performed, or the original block index if
174
+ no copy-on-write was necessary.
175
+ """
176
+ return self._cow_tracker.cow_block_if_not_appendable(block)
177
+
178
+ def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
179
+ """Returns the copy-on-write source->destination mapping and clears it.
180
+
181
+ Returns:
182
+ Dict[BlockId, List[BlockId]]: A dictionary mapping source
183
+ block indices to lists of destination block indices.
184
+ """
185
+ return self._cow_tracker.clear_cows()
186
+
187
+ def mark_blocks_as_accessed(self, block_ids: List[int],
188
+ now: float) -> None:
189
+ """Mark blocks as accessed, used in prefix caching.
190
+
191
+ Since the naive allocator does not implement prefix caching, we do
192
+ nothing.
193
+ """
194
+ pass
195
+
196
+ def mark_blocks_as_computed(self, block_ids: List[int]) -> None:
197
+ """Mark blocks as computed, used in prefix caching.
198
+
199
+ Since the naive allocator does not implement prefix caching, we do
200
+ nothing.
201
+ """
202
+ pass
203
+
204
+ def get_common_computed_block_ids(
205
+ self, seq_block_ids: List[List[int]]) -> List[int]:
206
+ """Determine blocks that can be skipped in prefill.
207
+
208
+ Since the naive allocator does not support prefix caching, always return
209
+ an empty list.
210
+ """
211
+ return []
212
+
213
+ def promote_to_immutable_block(self, block: Block) -> BlockId:
214
+ raise NotImplementedError
215
+
216
+
217
+ class NaiveBlock(Block):
218
+ """An implementation of the Block class that does not support prefix
219
+ caching.
220
+
221
+ The NaiveBlock class represents a block of token IDs with a fixed size. It
222
+ provides methods for appending token IDs to the block and manages copy-on
223
+ -write operations when necessary.
224
+
225
+ Args:
226
+ prev_block (Block): The previous block in the sequence.
227
+ token_ids (List[int]): The initial token IDs to be stored in the block.
228
+ block_size (int): The maximum number of token IDs that can be stored in
229
+ the block.
230
+ allocator (BlockAllocator): The block allocator associated with this
231
+ block.
232
+ block_id (Optional[int], optional): The physical block index
233
+ of this block. Defaults to None, which means no allocation has been
234
+ made.
235
+ _cow_target (Optional[Block], optional): The copy-on-write target block.
236
+ If not provided, it defaults to self.
237
+ """
238
+
239
+ def __init__(self,
240
+ prev_block: Optional[Block],
241
+ token_ids: List[int],
242
+ block_size: int,
243
+ allocator: BlockAllocator,
244
+ block_id: Optional[int] = None,
245
+ _cow_target: Optional[Block] = None):
246
+ self._token_ids: List[int] = []
247
+ self._block_size = block_size
248
+ self._prev_block = prev_block
249
+ self._block_id = block_id
250
+ self._allocator = allocator
251
+ self._cow_target = _cow_target if _cow_target is not None else self
252
+
253
+ self._append_token_ids_no_cow(token_ids)
254
+
255
+ def append_token_ids(self, token_ids: List[int]) -> None:
256
+ """Appends the given token IDs to the block, instructing the allocator
257
+ to perform a copy-on-write if necessary.
258
+
259
+ Args:
260
+ token_ids (List[int]): The token IDs to be appended to the block.
261
+ """
262
+ self._append_token_ids_no_cow(token_ids)
263
+
264
+ if self._block_id is not None:
265
+ self._block_id = (self._allocator.cow_block_if_not_appendable(
266
+ self._cow_target))
267
+
268
+ def _append_token_ids_no_cow(self, token_ids: List[int]) -> None:
269
+ assert self.num_empty_slots >= len(token_ids)
270
+ self._token_ids.extend(token_ids)
271
+
272
+ @property
273
+ def computed(self) -> bool:
274
+ raise NotImplementedError
275
+
276
+ @computed.setter
277
+ def computed(self, value) -> None:
278
+ raise NotImplementedError
279
+
280
+ @property
281
+ def last_accessed(self) -> float:
282
+ raise NotImplementedError
283
+
284
+ @last_accessed.setter
285
+ def last_accessed(self, last_accessed_ts: float):
286
+ raise NotImplementedError
287
+
288
+ @property
289
+ def block_id(self) -> Optional[int]:
290
+ return self._block_id
291
+
292
+ @block_id.setter
293
+ def block_id(self, value: Optional[int]) -> None:
294
+ self._block_id = value
295
+
296
+ @property
297
+ def is_full(self) -> bool:
298
+ return self.num_empty_slots == 0
299
+
300
+ @property
301
+ def num_empty_slots(self) -> int:
302
+ return self._block_size - len(self._token_ids)
303
+
304
+ @property
305
+ def token_ids(self) -> List[int]:
306
+ return self._token_ids
307
+
308
+ @property
309
+ def block_size(self) -> int:
310
+ return self._block_size
311
+
312
+ @property
313
+ def prev_block(self) -> Optional["Block"]:
314
+ return self._prev_block
315
+
316
+ @property
317
+ def content_hash(self) -> Optional[int]:
318
+ return None