tpu-inference 0.12.0.dev20251222__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. tests/__init__.py +13 -0
  2. tests/core/__init__.py +13 -0
  3. tests/core/test_core_tpu.py +513 -0
  4. tests/core/test_disagg_executor.py +60 -0
  5. tests/core/test_disagg_utils.py +67 -0
  6. tests/core/test_dp_scheduler.py +724 -0
  7. tests/core/test_init.py +63 -0
  8. tests/distributed/__init__.py +13 -0
  9. tests/distributed/test_distributed_utils.py +120 -0
  10. tests/distributed/test_tpu_connector.py +478 -0
  11. tests/e2e/__init__.py +13 -0
  12. tests/e2e/test_async_scheduler.py +211 -0
  13. tests/e2e/test_data_parallel.py +393 -0
  14. tests/e2e/test_local_disagg.py +257 -0
  15. tests/e2e/test_model_loader.py +268 -0
  16. tests/e2e/test_multi_modal_inference.py +111 -0
  17. tests/e2e/test_pipeline_parallel.py +265 -0
  18. tests/e2e/test_runai_model_streamer_loader.py +104 -0
  19. tests/e2e/test_sampling_params.py +269 -0
  20. tests/e2e/test_speculative_decoding.py +291 -0
  21. tests/e2e/test_structured_decoding.py +46 -0
  22. tests/executors/__init__.py +13 -0
  23. tests/executors/test_ray_distributed_executor.py +199 -0
  24. tests/experimental/__init__.py +13 -0
  25. tests/experimental/test_llama3_jax_stashed.py +208 -0
  26. tests/kernels/__init__.py +13 -0
  27. tests/kernels/collectives/__init__.py +13 -0
  28. tests/kernels/collectives/all_gather_matmul_kernel_test.py +69 -0
  29. tests/kernels/fused_moe_v1_test.py +388 -0
  30. tests/kernels/gmm_test.py +205 -0
  31. tests/kernels/mla_v1_test.py +498 -0
  32. tests/kernels/quantized_matmul_kernel_test.py +159 -0
  33. tests/kernels/ragged_kv_cache_update_v2_test.py +248 -0
  34. tests/kernels/ragged_paged_attention_kernel_v2_test.py +414 -0
  35. tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py +565 -0
  36. tests/kernels/ragged_paged_attention_kernel_v3_test.py +520 -0
  37. tests/layers/__init__.py +13 -0
  38. tests/layers/common/__init__.py +13 -0
  39. tests/layers/common/test_attention_interface.py +156 -0
  40. tests/layers/common/test_quantization.py +149 -0
  41. tests/layers/jax/__init__.py +13 -0
  42. tests/layers/jax/attention/__init__.py +13 -0
  43. tests/layers/jax/attention/test_common_attention.py +103 -0
  44. tests/layers/jax/attention/test_deepseek_v3_attention.py +233 -0
  45. tests/layers/jax/attention/test_llama4_attention.py +135 -0
  46. tests/layers/jax/moe/__init__.py +13 -0
  47. tests/layers/jax/moe/test_deepseek_moe.py +235 -0
  48. tests/layers/jax/sample/__init__.py +13 -0
  49. tests/layers/jax/sample/test_rejection_sampler.py +1624 -0
  50. tests/layers/jax/sample/test_sampling.py +115 -0
  51. tests/layers/jax/sample/test_sampling_metadata.py +254 -0
  52. tests/layers/jax/test_layers.py +155 -0
  53. tests/layers/jax/test_qwix.py +969 -0
  54. tests/layers/jax/test_rope.py +93 -0
  55. tests/layers/jax/test_sharding.py +159 -0
  56. tests/layers/jax/test_transformer_block.py +152 -0
  57. tests/layers/vllm/__init__.py +13 -0
  58. tests/layers/vllm/test_attention.py +363 -0
  59. tests/layers/vllm/test_awq.py +405 -0
  60. tests/layers/vllm/test_compressed_tensors_moe.py +202 -0
  61. tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py +403 -0
  62. tests/layers/vllm/test_compressed_tensors_w8a8_int8.py +426 -0
  63. tests/layers/vllm/test_fp8.py +17 -0
  64. tests/layers/vllm/test_mxfp4.py +297 -0
  65. tests/layers/vllm/test_unquantized.py +621 -0
  66. tests/layers/vllm/utils.py +72 -0
  67. tests/lora/__init__.py +13 -0
  68. tests/lora/conftest.py +46 -0
  69. tests/lora/test_bgmv.py +57 -0
  70. tests/lora/test_layers.py +666 -0
  71. tests/lora/test_lora.py +147 -0
  72. tests/lora/test_lora_perf.py +67 -0
  73. tests/lora/utils.py +88 -0
  74. tests/models/__init__.py +13 -0
  75. tests/models/common/__init__.py +13 -0
  76. tests/models/common/test_model_loader.py +455 -0
  77. tests/models/jax/__init__.py +13 -0
  78. tests/models/jax/test_deepseek_v3.py +401 -0
  79. tests/models/jax/test_llama3.py +184 -0
  80. tests/models/jax/test_llama4.py +298 -0
  81. tests/models/jax/test_llama_eagle3.py +197 -0
  82. tests/models/jax/test_llama_guard_4.py +242 -0
  83. tests/models/jax/test_qwen2.py +172 -0
  84. tests/models/jax/test_qwen2_5_vl.py +606 -0
  85. tests/models/jax/test_qwen3.py +169 -0
  86. tests/models/jax/test_weight_loading.py +180 -0
  87. tests/models/jax/utils/__init__.py +13 -0
  88. tests/models/jax/utils/test_multi_modal_utils.py +212 -0
  89. tests/platforms/__init__.py +13 -0
  90. tests/platforms/test_tpu_platform.py +54 -0
  91. tests/runner/__init__.py +13 -0
  92. tests/runner/test_block_table.py +395 -0
  93. tests/runner/test_input_batch.py +226 -0
  94. tests/runner/test_kv_cache.py +220 -0
  95. tests/runner/test_kv_cache_manager.py +498 -0
  96. tests/runner/test_multimodal_manager.py +429 -0
  97. tests/runner/test_persistent_batch_manager.py +84 -0
  98. tests/runner/test_speculative_decoding_manager.py +368 -0
  99. tests/runner/test_structured_decoding_manager.py +220 -0
  100. tests/runner/test_tpu_runner.py +202 -0
  101. tests/runner/test_tpu_runner_dp.py +1033 -0
  102. tests/runner/test_tpu_runner_mesh.py +200 -0
  103. tests/runner/test_utils.py +411 -0
  104. tests/spec_decode/__init__.py +13 -0
  105. tests/spec_decode/test_eagle3.py +311 -0
  106. tests/test_base.py +215 -0
  107. tests/test_envs.py +280 -0
  108. tests/test_tpu_info.py +134 -0
  109. tests/test_utils.py +193 -0
  110. tests/worker/__init__.py +13 -0
  111. tests/worker/tpu_worker_test.py +414 -0
  112. tpu_inference/__init__.py +67 -0
  113. tpu_inference/core/__init__.py +13 -0
  114. tpu_inference/core/core_tpu.py +786 -0
  115. tpu_inference/core/disagg_executor.py +118 -0
  116. tpu_inference/core/disagg_utils.py +49 -0
  117. tpu_inference/core/sched/__init__.py +13 -0
  118. tpu_inference/core/sched/dp_scheduler.py +814 -0
  119. tpu_inference/distributed/__init__.py +13 -0
  120. tpu_inference/distributed/jax_parallel_state.py +81 -0
  121. tpu_inference/distributed/tpu_connector.py +732 -0
  122. tpu_inference/distributed/utils.py +112 -0
  123. tpu_inference/env_override.py +9 -0
  124. tpu_inference/envs.py +191 -0
  125. tpu_inference/executors/__init__.py +13 -0
  126. tpu_inference/executors/ray_distributed_executor.py +399 -0
  127. tpu_inference/experimental/__init__.py +13 -0
  128. tpu_inference/experimental/llama3_jax_stashed.py +272 -0
  129. tpu_inference/kernels/__init__.py +13 -0
  130. tpu_inference/kernels/collectives/__init__.py +13 -0
  131. tpu_inference/kernels/collectives/all_gather_matmul.py +741 -0
  132. tpu_inference/kernels/collectives/all_gather_matmul_tuned_block_sizes.py +65 -0
  133. tpu_inference/kernels/collectives/util.py +47 -0
  134. tpu_inference/kernels/flash_attention/__init__.py +13 -0
  135. tpu_inference/kernels/flash_attention/kernel.py +772 -0
  136. tpu_inference/kernels/fused_moe/__init__.py +13 -0
  137. tpu_inference/kernels/fused_moe/v1/__init__.py +13 -0
  138. tpu_inference/kernels/fused_moe/v1/kernel.py +1612 -0
  139. tpu_inference/kernels/megablox/__init__.py +13 -0
  140. tpu_inference/kernels/megablox/common.py +54 -0
  141. tpu_inference/kernels/megablox/gmm.py +646 -0
  142. tpu_inference/kernels/mla/__init__.py +13 -0
  143. tpu_inference/kernels/mla/v1/__init__.py +13 -0
  144. tpu_inference/kernels/mla/v1/kernel.py +1340 -0
  145. tpu_inference/kernels/quantized_matmul/__init__.py +13 -0
  146. tpu_inference/kernels/quantized_matmul/kernel.py +456 -0
  147. tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py +609 -0
  148. tpu_inference/kernels/quantized_matmul/util.py +58 -0
  149. tpu_inference/kernels/ragged_paged_attention/__init__.py +13 -0
  150. tpu_inference/kernels/ragged_paged_attention/v2/__init__.py +13 -0
  151. tpu_inference/kernels/ragged_paged_attention/v2/kernel.py +876 -0
  152. tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py +288 -0
  153. tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py +1482 -0
  154. tpu_inference/kernels/ragged_paged_attention/v3/__init__.py +13 -0
  155. tpu_inference/kernels/ragged_paged_attention/v3/kernel.py +1594 -0
  156. tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py +1586 -0
  157. tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py +4460 -0
  158. tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py +548 -0
  159. tpu_inference/kernels/ragged_paged_attention/v3/util.py +65 -0
  160. tpu_inference/layers/__init__.py +13 -0
  161. tpu_inference/layers/common/__init__.py +13 -0
  162. tpu_inference/layers/common/attention_interface.py +403 -0
  163. tpu_inference/layers/common/attention_metadata.py +48 -0
  164. tpu_inference/layers/common/binary_search.py +295 -0
  165. tpu_inference/layers/common/quant_methods.py +23 -0
  166. tpu_inference/layers/common/quantization.py +270 -0
  167. tpu_inference/layers/common/sharding.py +600 -0
  168. tpu_inference/layers/jax/__init__.py +13 -0
  169. tpu_inference/layers/jax/attention/__init__.py +13 -0
  170. tpu_inference/layers/jax/attention/attention.py +268 -0
  171. tpu_inference/layers/jax/attention/deepseek_v3_attention.py +547 -0
  172. tpu_inference/layers/jax/attention/gpt_oss_attention.py +275 -0
  173. tpu_inference/layers/jax/attention/llama4_attention.py +167 -0
  174. tpu_inference/layers/jax/base.py +165 -0
  175. tpu_inference/layers/jax/constants.py +101 -0
  176. tpu_inference/layers/jax/layers.py +315 -0
  177. tpu_inference/layers/jax/misc.py +30 -0
  178. tpu_inference/layers/jax/moe/__init__.py +13 -0
  179. tpu_inference/layers/jax/moe/deepseek_v3_moe.py +615 -0
  180. tpu_inference/layers/jax/moe/gpt_oss_moe.py +199 -0
  181. tpu_inference/layers/jax/moe/moe.py +249 -0
  182. tpu_inference/layers/jax/pp_utils.py +53 -0
  183. tpu_inference/layers/jax/rope.py +294 -0
  184. tpu_inference/layers/jax/rope_interface.py +228 -0
  185. tpu_inference/layers/jax/sample/__init__.py +13 -0
  186. tpu_inference/layers/jax/sample/rejection_sampler.py +528 -0
  187. tpu_inference/layers/jax/sample/sampling.py +110 -0
  188. tpu_inference/layers/jax/sample/sampling_metadata.py +90 -0
  189. tpu_inference/layers/jax/transformer_block.py +121 -0
  190. tpu_inference/layers/vllm/__init__.py +13 -0
  191. tpu_inference/layers/vllm/attention.py +221 -0
  192. tpu_inference/layers/vllm/fused_moe.py +502 -0
  193. tpu_inference/layers/vllm/linear_common.py +221 -0
  194. tpu_inference/layers/vllm/quantization/__init__.py +55 -0
  195. tpu_inference/layers/vllm/quantization/awq.py +221 -0
  196. tpu_inference/layers/vllm/quantization/common.py +124 -0
  197. tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py +13 -0
  198. tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py +135 -0
  199. tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py +266 -0
  200. tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/__init__.py +13 -0
  201. tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +222 -0
  202. tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +150 -0
  203. tpu_inference/layers/vllm/quantization/fp8.py +118 -0
  204. tpu_inference/layers/vllm/quantization/mxfp4.py +396 -0
  205. tpu_inference/layers/vllm/quantization/unquantized.py +416 -0
  206. tpu_inference/layers/vllm/sharding.py +244 -0
  207. tpu_inference/logger.py +10 -0
  208. tpu_inference/lora/__init__.py +13 -0
  209. tpu_inference/lora/torch_lora_ops.py +98 -0
  210. tpu_inference/lora/torch_punica_tpu.py +310 -0
  211. tpu_inference/models/__init__.py +13 -0
  212. tpu_inference/models/common/__init__.py +13 -0
  213. tpu_inference/models/common/model_loader.py +520 -0
  214. tpu_inference/models/jax/__init__.py +13 -0
  215. tpu_inference/models/jax/deepseek_v3.py +978 -0
  216. tpu_inference/models/jax/gpt_oss.py +508 -0
  217. tpu_inference/models/jax/jax_intermediate_tensor.py +93 -0
  218. tpu_inference/models/jax/llama3.py +436 -0
  219. tpu_inference/models/jax/llama4.py +643 -0
  220. tpu_inference/models/jax/llama_eagle3.py +350 -0
  221. tpu_inference/models/jax/llama_guard_4.py +375 -0
  222. tpu_inference/models/jax/qwen2.py +390 -0
  223. tpu_inference/models/jax/qwen2_5_vl.py +1232 -0
  224. tpu_inference/models/jax/qwen3.py +318 -0
  225. tpu_inference/models/jax/utils/__init__.py +13 -0
  226. tpu_inference/models/jax/utils/file_utils.py +110 -0
  227. tpu_inference/models/jax/utils/multi_modal_utils.py +177 -0
  228. tpu_inference/models/jax/utils/qwix/__init__.py +13 -0
  229. tpu_inference/models/jax/utils/qwix/qwix_utils.py +713 -0
  230. tpu_inference/models/jax/utils/weight_utils.py +621 -0
  231. tpu_inference/models/vllm/__init__.py +13 -0
  232. tpu_inference/models/vllm/vllm_model_wrapper.py +307 -0
  233. tpu_inference/models/vllm/vllm_model_wrapper_context.py +59 -0
  234. tpu_inference/platforms/__init__.py +16 -0
  235. tpu_inference/platforms/tpu_platform.py +258 -0
  236. tpu_inference/runner/__init__.py +13 -0
  237. tpu_inference/runner/block_table.py +122 -0
  238. tpu_inference/runner/compilation_manager.py +890 -0
  239. tpu_inference/runner/input_batch.py +435 -0
  240. tpu_inference/runner/kv_cache.py +166 -0
  241. tpu_inference/runner/kv_cache_manager.py +508 -0
  242. tpu_inference/runner/lora_utils.py +106 -0
  243. tpu_inference/runner/multimodal_manager.py +231 -0
  244. tpu_inference/runner/persistent_batch_manager.py +296 -0
  245. tpu_inference/runner/speculative_decoding_manager.py +262 -0
  246. tpu_inference/runner/structured_decoding_manager.py +101 -0
  247. tpu_inference/runner/tpu_runner.py +1768 -0
  248. tpu_inference/runner/utils.py +426 -0
  249. tpu_inference/spec_decode/__init__.py +13 -0
  250. tpu_inference/spec_decode/jax/__init__.py +13 -0
  251. tpu_inference/spec_decode/jax/eagle3.py +430 -0
  252. tpu_inference/tpu_info.py +92 -0
  253. tpu_inference/utils.py +345 -0
  254. tpu_inference/worker/__init__.py +13 -0
  255. tpu_inference/worker/tpu_worker.py +468 -0
  256. tpu_inference-0.12.0.dev20251222.dist-info/METADATA +106 -0
  257. tpu_inference-0.12.0.dev20251222.dist-info/RECORD +260 -0
  258. tpu_inference-0.12.0.dev20251222.dist-info/WHEEL +5 -0
  259. tpu_inference-0.12.0.dev20251222.dist-info/licenses/LICENSE +201 -0
  260. tpu_inference-0.12.0.dev20251222.dist-info/top_level.txt +2 -0
@@ -0,0 +1,4460 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Auto-tuned block sizes for ragged paged attention."""
15
+
16
+ import jax.numpy as jnp
17
+
18
+ from tpu_inference.kernels.ragged_paged_attention.v3.util import (
19
+ align_to, get_dtype_packing, get_tpu_version, next_power_of_2)
20
+ from tpu_inference.logger import init_logger
21
+ from tpu_inference.utils import get_device_name
22
+
23
+ logger = init_logger(__name__)
24
+
25
+ # key
26
+ # - device_name
27
+ # - page_size
28
+ # - q_{q_dtype_name}_kv_{kv_dtype_name}
29
+ # - q_head-{num_q_heads}_kv_head-{num_kv_heads}-_head-{head_dim}
30
+ # - max_model_len-{max_model_len}-sw-{sliding_window}
31
+ # value:
32
+ # - (num_kv_pages_per_block, num_queries_per_block)
33
+ TUNED_BLOCK_SIZES = {
34
+ 'TPU v7': {
35
+ 128: {
36
+ 'q_bfloat16_kv_float8_e4m3fn': {
37
+ 'q_head-8_kv_head-4_head-256': {
38
+ 'max_model_len-2048-sw-None': (16, 32),
39
+ 'max_model_len-4096-sw-None': (32, 32),
40
+ 'max_model_len-128-sw-None': (1, 32),
41
+ 'max_model_len-8192-sw-None': (32, 64),
42
+ 'max_model_len-256-sw-None': (2, 32),
43
+ 'max_model_len-512-sw-None': (4, 32),
44
+ 'max_model_len-1024-sw-None': (8, 32),
45
+ },
46
+ 'q_head-16_kv_head-4_head-128': {
47
+ 'max_model_len-256-sw-None': (2, 32),
48
+ 'max_model_len-512-sw-None': (4, 32),
49
+ 'max_model_len-1024-sw-None': (8, 32),
50
+ 'max_model_len-2048-sw-None': (16, 32),
51
+ 'max_model_len-4096-sw-None': (32, 32),
52
+ 'max_model_len-8192-sw-None': (32, 32),
53
+ 'max_model_len-128-sw-None': (1, 32),
54
+ },
55
+ 'q_head-32_kv_head-16_head-256': {
56
+ 'max_model_len-4096-sw-None': (8, 16),
57
+ 'max_model_len-8192-sw-None': (8, 16),
58
+ 'max_model_len-128-sw-None': (1, 16),
59
+ 'max_model_len-256-sw-None': (2, 16),
60
+ 'max_model_len-512-sw-None': (4, 16),
61
+ 'max_model_len-1024-sw-None': (8, 16),
62
+ 'max_model_len-2048-sw-None': (8, 16),
63
+ },
64
+ 'q_head-32_kv_head-2_head-256': {
65
+ 'max_model_len-1024-sw-None': (8, 16),
66
+ 'max_model_len-2048-sw-None': (16, 16),
67
+ 'max_model_len-4096-sw-None': (32, 16),
68
+ 'max_model_len-8192-sw-None': (32, 32),
69
+ 'max_model_len-128-sw-None': (1, 16),
70
+ 'max_model_len-256-sw-None': (2, 16),
71
+ 'max_model_len-512-sw-None': (4, 16),
72
+ },
73
+ 'q_head-64_kv_head-2_head-128': {
74
+ 'max_model_len-4096-sw-None': (32, 16),
75
+ 'max_model_len-8192-sw-None': (32, 16),
76
+ 'max_model_len-128-sw-None': (1, 16),
77
+ 'max_model_len-256-sw-None': (2, 16),
78
+ 'max_model_len-512-sw-None': (4, 16),
79
+ 'max_model_len-1024-sw-None': (8, 16),
80
+ 'max_model_len-2048-sw-None': (16, 16),
81
+ },
82
+ 'q_head-64_kv_head-16_head-128': {
83
+ 'max_model_len-256-sw-None': (2, 16),
84
+ 'max_model_len-512-sw-None': (4, 16),
85
+ 'max_model_len-1024-sw-None': (8, 16),
86
+ 'max_model_len-2048-sw-None': (16, 16),
87
+ 'max_model_len-4096-sw-None': (16, 16),
88
+ 'max_model_len-8192-sw-None': (32, 8),
89
+ 'max_model_len-128-sw-None': (1, 16),
90
+ },
91
+ 'q_head-128_kv_head-8_head-256': {
92
+ 'max_model_len-1024-sw-None': (8, 8),
93
+ 'max_model_len-2048-sw-None': (16, 8),
94
+ 'max_model_len-4096-sw-None': (16, 8),
95
+ 'max_model_len-8192-sw-None': (16, 8),
96
+ 'max_model_len-128-sw-None': (1, 8),
97
+ 'max_model_len-256-sw-None': (2, 8),
98
+ 'max_model_len-512-sw-None': (4, 8),
99
+ },
100
+ 'q_head-4_kv_head-2_head-256': {
101
+ 'max_model_len-1024-sw-None': (8, 32),
102
+ 'max_model_len-2048-sw-None': (16, 32),
103
+ 'max_model_len-4096-sw-None': (32, 32),
104
+ 'max_model_len-8192-sw-None': (32, 128),
105
+ 'max_model_len-256-sw-None': (2, 32),
106
+ 'max_model_len-128-sw-None': (1, 64),
107
+ 'max_model_len-512-sw-None': (4, 64),
108
+ },
109
+ 'q_head-128_kv_head-2_head-128': {
110
+ 'max_model_len-128-sw-None': (1, 8),
111
+ 'max_model_len-256-sw-None': (2, 8),
112
+ 'max_model_len-512-sw-None': (4, 8),
113
+ 'max_model_len-1024-sw-None': (8, 8),
114
+ 'max_model_len-2048-sw-None': (16, 16),
115
+ 'max_model_len-4096-sw-None': (16, 16),
116
+ 'max_model_len-8192-sw-None': (16, 16),
117
+ },
118
+ 'q_head-64_kv_head-2_head-256': {
119
+ 'max_model_len-128-sw-None': (1, 8),
120
+ 'max_model_len-256-sw-None': (2, 8),
121
+ 'max_model_len-512-sw-None': (4, 8),
122
+ 'max_model_len-1024-sw-None': (8, 8),
123
+ 'max_model_len-2048-sw-None': (16, 16),
124
+ 'max_model_len-4096-sw-None': (32, 8),
125
+ 'max_model_len-8192-sw-None': (32, 8),
126
+ },
127
+ 'q_head-128_kv_head-16_head-128': {
128
+ 'max_model_len-128-sw-None': (1, 8),
129
+ 'max_model_len-256-sw-None': (2, 8),
130
+ 'max_model_len-512-sw-None': (4, 8),
131
+ 'max_model_len-1024-sw-None': (8, 8),
132
+ 'max_model_len-2048-sw-None': (16, 8),
133
+ 'max_model_len-4096-sw-None': (16, 8),
134
+ 'max_model_len-8192-sw-None': (16, 8),
135
+ },
136
+ 'q_head-32_kv_head-4_head-128': {
137
+ 'max_model_len-128-sw-None': (1, 32),
138
+ 'max_model_len-256-sw-None': (2, 32),
139
+ 'max_model_len-512-sw-None': (4, 32),
140
+ 'max_model_len-1024-sw-None': (8, 32),
141
+ 'max_model_len-2048-sw-None': (16, 32),
142
+ 'max_model_len-4096-sw-None': (32, 32),
143
+ 'max_model_len-8192-sw-None': (32, 32),
144
+ },
145
+ 'q_head-8_kv_head-4_head-128': {
146
+ 'max_model_len-128-sw-None': (1, 64),
147
+ 'max_model_len-256-sw-None': (2, 64),
148
+ 'max_model_len-512-sw-None': (4, 64),
149
+ 'max_model_len-1024-sw-None': (8, 64),
150
+ 'max_model_len-2048-sw-None': (16, 64),
151
+ 'max_model_len-4096-sw-None': (32, 128),
152
+ 'max_model_len-8192-sw-None': (32, 128),
153
+ },
154
+ 'q_head-8_kv_head-2_head-128': {
155
+ 'max_model_len-128-sw-None': (1, 128),
156
+ 'max_model_len-256-sw-None': (2, 64),
157
+ 'max_model_len-512-sw-None': (4, 32),
158
+ 'max_model_len-1024-sw-None': (8, 64),
159
+ 'max_model_len-2048-sw-None': (16, 32),
160
+ 'max_model_len-4096-sw-None': (32, 32),
161
+ 'max_model_len-8192-sw-None': (32, 32),
162
+ },
163
+ 'q_head-64_kv_head-16_head-256': {
164
+ 'max_model_len-128-sw-None': (1, 8),
165
+ 'max_model_len-256-sw-None': (2, 8),
166
+ 'max_model_len-512-sw-None': (4, 8),
167
+ 'max_model_len-1024-sw-None': (8, 8),
168
+ 'max_model_len-2048-sw-None': (8, 8),
169
+ 'max_model_len-4096-sw-None': (8, 8),
170
+ 'max_model_len-8192-sw-None': (8, 8),
171
+ },
172
+ 'q_head-2_kv_head-2_head-128': {
173
+ 'max_model_len-128-sw-None': (1, 128),
174
+ 'max_model_len-256-sw-None': (2, 128),
175
+ 'max_model_len-512-sw-None': (4, 128),
176
+ 'max_model_len-1024-sw-None': (8, 128),
177
+ 'max_model_len-2048-sw-None': (16, 64),
178
+ 'max_model_len-4096-sw-None': (32, 128),
179
+ 'max_model_len-8192-sw-None': (32, 128),
180
+ },
181
+ 'q_head-16_kv_head-4_head-256': {
182
+ 'max_model_len-128-sw-None': (1, 16),
183
+ 'max_model_len-256-sw-None': (2, 32),
184
+ 'max_model_len-512-sw-None': (4, 32),
185
+ 'max_model_len-1024-sw-None': (8, 32),
186
+ 'max_model_len-2048-sw-None': (16, 32),
187
+ 'max_model_len-4096-sw-None': (32, 32),
188
+ 'max_model_len-8192-sw-None': (32, 32),
189
+ },
190
+ 'q_head-64_kv_head-32_head-128': {
191
+ 'max_model_len-128-sw-None': (1, 8),
192
+ 'max_model_len-256-sw-None': (2, 8),
193
+ 'max_model_len-512-sw-None': (4, 8),
194
+ 'max_model_len-1024-sw-None': (8, 8),
195
+ 'max_model_len-2048-sw-None': (8, 8),
196
+ 'max_model_len-4096-sw-None': (8, 8),
197
+ 'max_model_len-8192-sw-None': (8, 8),
198
+ },
199
+ 'q_head-16_kv_head-2_head-256': {
200
+ 'max_model_len-256-sw-None': (2, 32),
201
+ 'max_model_len-512-sw-None': (4, 32),
202
+ 'max_model_len-128-sw-None': (1, 32),
203
+ 'max_model_len-1024-sw-None': (8, 32),
204
+ 'max_model_len-2048-sw-None': (16, 32),
205
+ 'max_model_len-4096-sw-None': (32, 32),
206
+ 'max_model_len-8192-sw-None': (32, 32),
207
+ },
208
+ 'q_head-32_kv_head-16_head-128': {
209
+ 'max_model_len-4096-sw-None': (32, 16),
210
+ 'max_model_len-8192-sw-None': (32, 16),
211
+ 'max_model_len-128-sw-None': (1, 32),
212
+ 'max_model_len-256-sw-None': (2, 32),
213
+ 'max_model_len-512-sw-None': (4, 32),
214
+ 'max_model_len-1024-sw-None': (8, 32),
215
+ 'max_model_len-2048-sw-None': (16, 32),
216
+ },
217
+ 'q_head-32_kv_head-2_head-128': {
218
+ 'max_model_len-1024-sw-None': (8, 32),
219
+ 'max_model_len-2048-sw-None': (16, 32),
220
+ 'max_model_len-128-sw-None': (1, 32),
221
+ 'max_model_len-256-sw-None': (2, 32),
222
+ 'max_model_len-512-sw-None': (4, 32),
223
+ 'max_model_len-4096-sw-None': (32, 32),
224
+ 'max_model_len-8192-sw-None': (32, 32),
225
+ },
226
+ 'q_head-64_kv_head-8_head-256': {
227
+ 'max_model_len-256-sw-None': (2, 16),
228
+ 'max_model_len-512-sw-None': (4, 16),
229
+ 'max_model_len-1024-sw-None': (8, 16),
230
+ 'max_model_len-2048-sw-None': (16, 16),
231
+ 'max_model_len-128-sw-None': (1, 8),
232
+ 'max_model_len-4096-sw-None': (16, 16),
233
+ 'max_model_len-8192-sw-None': (16, 16),
234
+ },
235
+ 'q_head-128_kv_head-8_head-128': {
236
+ 'max_model_len-2048-sw-None': (16, 16),
237
+ 'max_model_len-4096-sw-None': (16, 16),
238
+ 'max_model_len-8192-sw-None': (16, 16),
239
+ 'max_model_len-128-sw-None': (1, 8),
240
+ 'max_model_len-256-sw-None': (2, 8),
241
+ 'max_model_len-512-sw-None': (4, 16),
242
+ 'max_model_len-1024-sw-None': (8, 16),
243
+ },
244
+ 'q_head-128_kv_head-2_head-256': {
245
+ 'max_model_len-128-sw-None': (1, 8),
246
+ 'max_model_len-256-sw-None': (2, 8),
247
+ 'max_model_len-512-sw-None': (4, 8),
248
+ 'max_model_len-1024-sw-None': (8, 8),
249
+ 'max_model_len-2048-sw-None': (16, 8),
250
+ 'max_model_len-4096-sw-None': (32, 8),
251
+ 'max_model_len-8192-sw-None': (32, 8),
252
+ },
253
+ 'q_head-16_kv_head-8_head-128': {
254
+ 'max_model_len-128-sw-None': (1, 64),
255
+ 'max_model_len-256-sw-None': (2, 32),
256
+ 'max_model_len-512-sw-None': (4, 32),
257
+ 'max_model_len-1024-sw-None': (8, 32),
258
+ 'max_model_len-2048-sw-None': (16, 32),
259
+ 'max_model_len-4096-sw-None': (32, 32),
260
+ 'max_model_len-8192-sw-None': (32, 64),
261
+ },
262
+ 'q_head-64_kv_head-4_head-128': {
263
+ 'max_model_len-128-sw-None': (1, 16),
264
+ 'max_model_len-256-sw-None': (2, 16),
265
+ 'max_model_len-512-sw-None': (4, 16),
266
+ 'max_model_len-1024-sw-None': (8, 16),
267
+ 'max_model_len-2048-sw-None': (16, 16),
268
+ 'max_model_len-4096-sw-None': (32, 16),
269
+ 'max_model_len-8192-sw-None': (32, 32),
270
+ },
271
+ 'q_head-16_kv_head-2_head-128': {
272
+ 'max_model_len-128-sw-None': (1, 32),
273
+ 'max_model_len-256-sw-None': (2, 32),
274
+ 'max_model_len-512-sw-None': (4, 32),
275
+ 'max_model_len-1024-sw-None': (8, 32),
276
+ 'max_model_len-2048-sw-None': (16, 32),
277
+ 'max_model_len-4096-sw-None': (32, 32),
278
+ 'max_model_len-8192-sw-None': (32, 32),
279
+ },
280
+ 'q_head-32_kv_head-4_head-256': {
281
+ 'max_model_len-128-sw-None': (1, 16),
282
+ 'max_model_len-256-sw-None': (2, 16),
283
+ 'max_model_len-512-sw-None': (4, 16),
284
+ 'max_model_len-1024-sw-None': (8, 16),
285
+ 'max_model_len-2048-sw-None': (16, 16),
286
+ 'max_model_len-4096-sw-None': (32, 32),
287
+ 'max_model_len-8192-sw-None': (32, 32),
288
+ },
289
+ 'q_head-8_kv_head-2_head-256': {
290
+ 'max_model_len-128-sw-None': (1, 32),
291
+ 'max_model_len-256-sw-None': (2, 32),
292
+ 'max_model_len-512-sw-None': (4, 32),
293
+ 'max_model_len-1024-sw-None': (8, 32),
294
+ 'max_model_len-2048-sw-None': (16, 32),
295
+ 'max_model_len-4096-sw-None': (32, 32),
296
+ 'max_model_len-8192-sw-None': (32, 32),
297
+ },
298
+ 'q_head-2_kv_head-2_head-256': {
299
+ 'max_model_len-128-sw-None': (1, 64),
300
+ 'max_model_len-256-sw-None': (2, 32),
301
+ 'max_model_len-512-sw-None': (4, 32),
302
+ 'max_model_len-1024-sw-None': (8, 32),
303
+ 'max_model_len-2048-sw-None': (16, 32),
304
+ 'max_model_len-4096-sw-None': (32, 32),
305
+ 'max_model_len-8192-sw-None': (32, 128),
306
+ },
307
+ 'q_head-128_kv_head-4_head-128': {
308
+ 'max_model_len-128-sw-None': (1, 8),
309
+ 'max_model_len-256-sw-None': (2, 8),
310
+ 'max_model_len-512-sw-None': (4, 8),
311
+ 'max_model_len-1024-sw-None': (8, 8),
312
+ 'max_model_len-2048-sw-None': (16, 8),
313
+ 'max_model_len-4096-sw-None': (16, 16),
314
+ 'max_model_len-8192-sw-None': (16, 16),
315
+ },
316
+ 'q_head-4_kv_head-2_head-128': {
317
+ 'max_model_len-1024-sw-None': (8, 128),
318
+ 'max_model_len-128-sw-None': (1, 128),
319
+ 'max_model_len-256-sw-None': (2, 128),
320
+ 'max_model_len-512-sw-None': (4, 128),
321
+ 'max_model_len-2048-sw-None': (16, 128),
322
+ 'max_model_len-4096-sw-None': (32, 128),
323
+ 'max_model_len-8192-sw-None': (32, 128),
324
+ },
325
+ 'q_head-16_kv_head-8_head-256': {
326
+ 'max_model_len-128-sw-None': (1, 32),
327
+ 'max_model_len-256-sw-None': (2, 32),
328
+ 'max_model_len-512-sw-None': (4, 32),
329
+ 'max_model_len-1024-sw-None': (8, 32),
330
+ 'max_model_len-2048-sw-None': (16, 32),
331
+ 'max_model_len-4096-sw-None': (32, 32),
332
+ 'max_model_len-8192-sw-None': (32, 32),
333
+ },
334
+ 'q_head-64_kv_head-4_head-256': {
335
+ 'max_model_len-128-sw-None': (1, 8),
336
+ 'max_model_len-256-sw-None': (2, 8),
337
+ 'max_model_len-512-sw-None': (4, 8),
338
+ 'max_model_len-1024-sw-None': (8, 8),
339
+ 'max_model_len-2048-sw-None': (16, 16),
340
+ 'max_model_len-4096-sw-None': (32, 8),
341
+ 'max_model_len-8192-sw-None': (32, 16),
342
+ },
343
+ 'q_head-32_kv_head-8_head-128': {
344
+ 'max_model_len-128-sw-None': (1, 32),
345
+ 'max_model_len-256-sw-None': (2, 32),
346
+ 'max_model_len-512-sw-None': (4, 32),
347
+ 'max_model_len-1024-sw-None': (8, 32),
348
+ 'max_model_len-2048-sw-None': (16, 32),
349
+ 'max_model_len-4096-sw-None': (32, 32),
350
+ 'max_model_len-8192-sw-None': (32, 32),
351
+ },
352
+ 'q_head-128_kv_head-4_head-256': {
353
+ 'max_model_len-128-sw-None': (1, 8),
354
+ 'max_model_len-256-sw-None': (2, 8),
355
+ 'max_model_len-512-sw-None': (4, 8),
356
+ 'max_model_len-1024-sw-None': (8, 8),
357
+ 'max_model_len-2048-sw-None': (16, 8),
358
+ 'max_model_len-4096-sw-None': (16, 16),
359
+ 'max_model_len-8192-sw-None': (16, 16),
360
+ },
361
+ 'q_head-64_kv_head-8_head-128': {
362
+ 'max_model_len-128-sw-None': (1, 16),
363
+ 'max_model_len-256-sw-None': (2, 16),
364
+ 'max_model_len-512-sw-None': (4, 16),
365
+ 'max_model_len-1024-sw-None': (8, 32),
366
+ 'max_model_len-2048-sw-None': (16, 32),
367
+ 'max_model_len-4096-sw-None': (32, 16),
368
+ 'max_model_len-8192-sw-None': (32, 16),
369
+ },
370
+ 'q_head-32_kv_head-8_head-256': {
371
+ 'max_model_len-128-sw-None': (1, 16),
372
+ 'max_model_len-256-sw-None': (2, 16),
373
+ 'max_model_len-512-sw-None': (4, 16),
374
+ 'max_model_len-1024-sw-None': (8, 32),
375
+ 'max_model_len-2048-sw-None': (16, 32),
376
+ 'max_model_len-4096-sw-None': (16, 32),
377
+ 'max_model_len-8192-sw-None': (16, 32),
378
+ },
379
+ },
380
+ 'q_bfloat16_kv_bfloat16': {
381
+ 'q_head-8_kv_head-2_head-128': {
382
+ 'max_model_len-8192-sw-None': (32, 32),
383
+ 'max_model_len-128-sw-None': (1, 128),
384
+ 'max_model_len-512-sw-None': (4, 32),
385
+ 'max_model_len-256-sw-None': (2, 128),
386
+ 'max_model_len-1024-sw-None': (8, 64),
387
+ 'max_model_len-2048-sw-None': (16, 32),
388
+ 'max_model_len-4096-sw-None': (32, 32),
389
+ },
390
+ 'q_head-16_kv_head-2_head-128': {
391
+ 'max_model_len-128-sw-None': (1, 64),
392
+ 'max_model_len-256-sw-None': (2, 64),
393
+ 'max_model_len-512-sw-None': (4, 64),
394
+ 'max_model_len-1024-sw-None': (8, 32),
395
+ 'max_model_len-2048-sw-None': (16, 32),
396
+ 'max_model_len-4096-sw-None': (32, 32),
397
+ 'max_model_len-8192-sw-None': (32, 32),
398
+ },
399
+ 'q_head-16_kv_head-8_head-256': {
400
+ 'max_model_len-8192-sw-None': (16, 64),
401
+ 'max_model_len-2048-sw-None': (16, 32),
402
+ 'max_model_len-4096-sw-None': (16, 64),
403
+ 'max_model_len-128-sw-None': (1, 32),
404
+ 'max_model_len-256-sw-None': (2, 32),
405
+ 'max_model_len-512-sw-None': (4, 32),
406
+ 'max_model_len-1024-sw-None': (8, 32),
407
+ },
408
+ 'q_head-32_kv_head-1_head-256': {
409
+ 'max_model_len-1024-sw-None': (8, 16),
410
+ 'max_model_len-2048-sw-None': (16, 16),
411
+ 'max_model_len-4096-sw-None': (32, 16),
412
+ 'max_model_len-8192-sw-None': (32, 32),
413
+ 'max_model_len-128-sw-None': (1, 16),
414
+ 'max_model_len-256-sw-None': (2, 16),
415
+ 'max_model_len-512-sw-None': (4, 16),
416
+ },
417
+ 'q_head-32_kv_head-8_head-256': {
418
+ 'max_model_len-128-sw-None': (1, 32),
419
+ 'max_model_len-256-sw-None': (2, 16),
420
+ 'max_model_len-512-sw-None': (4, 32),
421
+ 'max_model_len-1024-sw-None': (8, 32),
422
+ 'max_model_len-2048-sw-None': (8, 32),
423
+ 'max_model_len-4096-sw-None': (8, 32),
424
+ 'max_model_len-8192-sw-None': (8, 32),
425
+ },
426
+ 'q_head-64_kv_head-1_head-128': {
427
+ 'max_model_len-4096-sw-None': (32, 16),
428
+ 'max_model_len-8192-sw-None': (32, 16),
429
+ 'max_model_len-128-sw-None': (1, 16),
430
+ 'max_model_len-256-sw-None': (2, 16),
431
+ 'max_model_len-512-sw-None': (4, 16),
432
+ 'max_model_len-1024-sw-None': (8, 16),
433
+ 'max_model_len-2048-sw-None': (16, 16),
434
+ },
435
+ 'q_head-64_kv_head-8_head-128': {
436
+ 'max_model_len-512-sw-None': (4, 16),
437
+ 'max_model_len-1024-sw-None': (8, 32),
438
+ 'max_model_len-2048-sw-None': (16, 32),
439
+ 'max_model_len-4096-sw-None': (16, 32),
440
+ 'max_model_len-8192-sw-None': (16, 32),
441
+ 'max_model_len-128-sw-None': (1, 32),
442
+ 'max_model_len-256-sw-None': (2, 16),
443
+ },
444
+ 'q_head-128_kv_head-4_head-256': {
445
+ 'max_model_len-2048-sw-None': (16, 8),
446
+ 'max_model_len-4096-sw-None': (32, 8),
447
+ 'max_model_len-8192-sw-None': (16, 16),
448
+ 'max_model_len-128-sw-None': (1, 8),
449
+ 'max_model_len-256-sw-None': (2, 8),
450
+ 'max_model_len-512-sw-None': (4, 8),
451
+ 'max_model_len-1024-sw-None': (8, 8),
452
+ },
453
+ 'q_head-128_kv_head-1_head-128': {
454
+ 'max_model_len-128-sw-None': (1, 8),
455
+ 'max_model_len-256-sw-None': (2, 8),
456
+ 'max_model_len-512-sw-None': (4, 8),
457
+ 'max_model_len-1024-sw-None': (8, 8),
458
+ 'max_model_len-2048-sw-None': (16, 16),
459
+ 'max_model_len-4096-sw-None': (32, 8),
460
+ 'max_model_len-8192-sw-None': (32, 8),
461
+ },
462
+ 'q_head-8_kv_head-2_head-256': {
463
+ 'max_model_len-128-sw-None': (1, 32),
464
+ 'max_model_len-256-sw-None': (2, 32),
465
+ 'max_model_len-512-sw-None': (4, 32),
466
+ 'max_model_len-1024-sw-None': (8, 32),
467
+ 'max_model_len-2048-sw-None': (16, 32),
468
+ 'max_model_len-4096-sw-None': (32, 32),
469
+ 'max_model_len-8192-sw-None': (32, 32),
470
+ },
471
+ 'q_head-32_kv_head-16_head-128': {
472
+ 'max_model_len-128-sw-None': (1, 32),
473
+ 'max_model_len-256-sw-None': (2, 32),
474
+ 'max_model_len-512-sw-None': (4, 32),
475
+ 'max_model_len-1024-sw-None': (8, 32),
476
+ 'max_model_len-2048-sw-None': (8, 32),
477
+ 'max_model_len-4096-sw-None': (8, 32),
478
+ 'max_model_len-8192-sw-None': (8, 32),
479
+ },
480
+ 'q_head-2_kv_head-1_head-256': {
481
+ 'max_model_len-128-sw-None': (1, 128),
482
+ 'max_model_len-512-sw-None': (4, 64),
483
+ 'max_model_len-4096-sw-None': (32, 128),
484
+ 'max_model_len-256-sw-None': (2, 64),
485
+ 'max_model_len-1024-sw-None': (8, 64),
486
+ 'max_model_len-8192-sw-None': (32, 128),
487
+ 'max_model_len-2048-sw-None': (16, 64),
488
+ },
489
+ 'q_head-2_kv_head-1_head-128': {
490
+ 'max_model_len-512-sw-None': (4, 128),
491
+ 'max_model_len-256-sw-None': (2, 128),
492
+ 'max_model_len-2048-sw-None': (16, 128),
493
+ 'max_model_len-4096-sw-None': (32, 128),
494
+ 'max_model_len-128-sw-None': (1, 256),
495
+ 'max_model_len-8192-sw-None': (32, 128),
496
+ 'max_model_len-1024-sw-None': (8, 128),
497
+ },
498
+ 'q_head-8_kv_head-1_head-256': {
499
+ 'max_model_len-512-sw-None': (4, 32),
500
+ 'max_model_len-1024-sw-None': (8, 32),
501
+ 'max_model_len-256-sw-None': (2, 32),
502
+ 'max_model_len-2048-sw-None': (16, 32),
503
+ 'max_model_len-128-sw-None': (1, 32),
504
+ 'max_model_len-4096-sw-None': (32, 32),
505
+ 'max_model_len-8192-sw-None': (32, 32),
506
+ },
507
+ 'q_head-64_kv_head-1_head-256': {
508
+ 'max_model_len-128-sw-None': (1, 8),
509
+ 'max_model_len-256-sw-None': (2, 8),
510
+ 'max_model_len-512-sw-None': (4, 8),
511
+ 'max_model_len-1024-sw-None': (8, 8),
512
+ 'max_model_len-2048-sw-None': (16, 16),
513
+ 'max_model_len-4096-sw-None': (32, 16),
514
+ 'max_model_len-8192-sw-None': (32, 16),
515
+ },
516
+ 'q_head-4_kv_head-2_head-128': {
517
+ 'max_model_len-8192-sw-None': (32, 128),
518
+ 'max_model_len-128-sw-None': (1, 128),
519
+ 'max_model_len-512-sw-None': (4, 128),
520
+ 'max_model_len-4096-sw-None': (32, 128),
521
+ 'max_model_len-256-sw-None': (2, 128),
522
+ 'max_model_len-1024-sw-None': (8, 64),
523
+ 'max_model_len-2048-sw-None': (16, 128),
524
+ },
525
+ 'q_head-4_kv_head-2_head-256': {
526
+ 'max_model_len-256-sw-None': (2, 32),
527
+ 'max_model_len-1024-sw-None': (8, 32),
528
+ 'max_model_len-8192-sw-None': (32, 128),
529
+ 'max_model_len-512-sw-None': (4, 64),
530
+ 'max_model_len-2048-sw-None': (16, 32),
531
+ 'max_model_len-128-sw-None': (1, 64),
532
+ 'max_model_len-4096-sw-None': (32, 128),
533
+ },
534
+ 'q_head-8_kv_head-4_head-128': {
535
+ 'max_model_len-512-sw-None': (4, 128),
536
+ 'max_model_len-128-sw-None': (1, 64),
537
+ 'max_model_len-256-sw-None': (2, 64),
538
+ 'max_model_len-1024-sw-None': (8, 128),
539
+ 'max_model_len-2048-sw-None': (16, 128),
540
+ 'max_model_len-4096-sw-None': (32, 128),
541
+ 'max_model_len-8192-sw-None': (32, 128),
542
+ },
543
+ 'q_head-8_kv_head-4_head-256': {
544
+ 'max_model_len-1024-sw-None': (8, 32),
545
+ 'max_model_len-2048-sw-None': (16, 32),
546
+ 'max_model_len-128-sw-None': (1, 32),
547
+ 'max_model_len-256-sw-None': (2, 32),
548
+ 'max_model_len-512-sw-None': (4, 32),
549
+ 'max_model_len-4096-sw-None': (32, 128),
550
+ 'max_model_len-8192-sw-None': (32, 128),
551
+ },
552
+ 'q_head-8_kv_head-1_head-128': {
553
+ 'max_model_len-256-sw-None': (2, 128),
554
+ 'max_model_len-8192-sw-None': (32, 32),
555
+ 'max_model_len-512-sw-None': (4, 64),
556
+ 'max_model_len-128-sw-None': (1, 64),
557
+ 'max_model_len-1024-sw-None': (8, 32),
558
+ 'max_model_len-2048-sw-None': (16, 32),
559
+ 'max_model_len-4096-sw-None': (32, 32),
560
+ },
561
+ 'q_head-32_kv_head-2_head-128': {
562
+ 'max_model_len-128-sw-None': (1, 32),
563
+ 'max_model_len-256-sw-None': (2, 32),
564
+ 'max_model_len-512-sw-None': (4, 32),
565
+ 'max_model_len-1024-sw-None': (8, 32),
566
+ 'max_model_len-2048-sw-None': (16, 32),
567
+ 'max_model_len-4096-sw-None': (32, 32),
568
+ 'max_model_len-8192-sw-None': (32, 32),
569
+ },
570
+ 'q_head-128_kv_head-8_head-128': {
571
+ 'max_model_len-128-sw-None': (1, 16),
572
+ 'max_model_len-256-sw-None': (2, 8),
573
+ 'max_model_len-512-sw-None': (4, 16),
574
+ 'max_model_len-1024-sw-None': (8, 16),
575
+ 'max_model_len-2048-sw-None': (16, 16),
576
+ 'max_model_len-4096-sw-None': (16, 16),
577
+ 'max_model_len-8192-sw-None': (16, 16),
578
+ },
579
+ 'q_head-64_kv_head-8_head-256': {
580
+ 'max_model_len-128-sw-None': (1, 16),
581
+ 'max_model_len-256-sw-None': (2, 16),
582
+ 'max_model_len-512-sw-None': (4, 16),
583
+ 'max_model_len-1024-sw-None': (8, 16),
584
+ 'max_model_len-2048-sw-None': (8, 16),
585
+ 'max_model_len-4096-sw-None': (8, 16),
586
+ 'max_model_len-8192-sw-None': (8, 16),
587
+ },
588
+ 'q_head-16_kv_head-2_head-256': {
589
+ 'max_model_len-128-sw-None': (1, 32),
590
+ 'max_model_len-256-sw-None': (2, 32),
591
+ 'max_model_len-512-sw-None': (4, 32),
592
+ 'max_model_len-1024-sw-None': (8, 32),
593
+ 'max_model_len-2048-sw-None': (16, 32),
594
+ 'max_model_len-4096-sw-None': (32, 32),
595
+ 'max_model_len-8192-sw-None': (32, 32),
596
+ },
597
+ 'q_head-4_kv_head-1_head-128': {
598
+ 'max_model_len-1024-sw-None': (8, 64),
599
+ 'max_model_len-8192-sw-None': (32, 128),
600
+ 'max_model_len-2048-sw-None': (16, 128),
601
+ 'max_model_len-128-sw-None': (1, 128),
602
+ 'max_model_len-4096-sw-None': (32, 128),
603
+ 'max_model_len-256-sw-None': (2, 128),
604
+ 'max_model_len-512-sw-None': (4, 128),
605
+ },
606
+ 'q_head-16_kv_head-1_head-256': {
607
+ 'max_model_len-256-sw-None': (2, 32),
608
+ 'max_model_len-512-sw-None': (4, 32),
609
+ 'max_model_len-1024-sw-None': (8, 32),
610
+ 'max_model_len-2048-sw-None': (16, 32),
611
+ 'max_model_len-128-sw-None': (1, 32),
612
+ 'max_model_len-4096-sw-None': (32, 32),
613
+ 'max_model_len-8192-sw-None': (32, 32),
614
+ },
615
+ 'q_head-16_kv_head-8_head-128': {
616
+ 'max_model_len-8192-sw-None': (32, 64),
617
+ 'max_model_len-128-sw-None': (1, 32),
618
+ 'max_model_len-256-sw-None': (2, 64),
619
+ 'max_model_len-512-sw-None': (4, 64),
620
+ 'max_model_len-1024-sw-None': (8, 64),
621
+ 'max_model_len-2048-sw-None': (16, 64),
622
+ 'max_model_len-4096-sw-None': (32, 64),
623
+ },
624
+ 'q_head-32_kv_head-1_head-128': {
625
+ 'max_model_len-1024-sw-None': (8, 32),
626
+ 'max_model_len-2048-sw-None': (16, 32),
627
+ 'max_model_len-4096-sw-None': (32, 32),
628
+ 'max_model_len-128-sw-None': (1, 32),
629
+ 'max_model_len-256-sw-None': (2, 32),
630
+ 'max_model_len-8192-sw-None': (32, 32),
631
+ 'max_model_len-512-sw-None': (4, 32),
632
+ },
633
+ 'q_head-32_kv_head-8_head-128': {
634
+ 'max_model_len-128-sw-None': (1, 32),
635
+ 'max_model_len-256-sw-None': (2, 32),
636
+ 'max_model_len-512-sw-None': (4, 32),
637
+ 'max_model_len-1024-sw-None': (8, 32),
638
+ 'max_model_len-2048-sw-None': (16, 32),
639
+ 'max_model_len-4096-sw-None': (32, 32),
640
+ 'max_model_len-8192-sw-None': (32, 32),
641
+ },
642
+ 'q_head-64_kv_head-4_head-256': {
643
+ 'max_model_len-512-sw-None': (4, 16),
644
+ 'max_model_len-1024-sw-None': (8, 16),
645
+ 'max_model_len-128-sw-None': (1, 8),
646
+ 'max_model_len-2048-sw-None': (16, 16),
647
+ 'max_model_len-256-sw-None': (2, 16),
648
+ 'max_model_len-4096-sw-None': (16, 16),
649
+ 'max_model_len-8192-sw-None': (32, 16),
650
+ },
651
+ 'q_head-128_kv_head-4_head-128': {
652
+ 'max_model_len-2048-sw-None': (16, 16),
653
+ 'max_model_len-4096-sw-None': (16, 16),
654
+ 'max_model_len-128-sw-None': (1, 8),
655
+ 'max_model_len-256-sw-None': (2, 8),
656
+ 'max_model_len-512-sw-None': (4, 8),
657
+ 'max_model_len-8192-sw-None': (16, 16),
658
+ 'max_model_len-1024-sw-None': (8, 8),
659
+ },
660
+ 'q_head-4_kv_head-1_head-256': {
661
+ 'max_model_len-2048-sw-None': (16, 32),
662
+ 'max_model_len-128-sw-None': (1, 32),
663
+ 'max_model_len-4096-sw-None': (32, 32),
664
+ 'max_model_len-256-sw-None': (2, 32),
665
+ 'max_model_len-8192-sw-None': (32, 32),
666
+ 'max_model_len-512-sw-None': (4, 32),
667
+ 'max_model_len-1024-sw-None': (8, 32),
668
+ },
669
+ 'q_head-128_kv_head-1_head-256': {
670
+ 'max_model_len-128-sw-None': (1, 8),
671
+ 'max_model_len-256-sw-None': (2, 8),
672
+ 'max_model_len-512-sw-None': (4, 8),
673
+ 'max_model_len-1024-sw-None': (8, 8),
674
+ 'max_model_len-2048-sw-None': (16, 8),
675
+ 'max_model_len-4096-sw-None': (32, 8),
676
+ 'max_model_len-8192-sw-None': (32, 8),
677
+ },
678
+ 'q_head-32_kv_head-16_head-256': {
679
+ 'max_model_len-128-sw-None': (1, 16),
680
+ 'max_model_len-256-sw-None': (2, 16),
681
+ 'max_model_len-512-sw-None': (4, 16),
682
+ 'max_model_len-1024-sw-None': (4, 16),
683
+ 'max_model_len-2048-sw-None': (4, 16),
684
+ 'max_model_len-4096-sw-None': (4, 16),
685
+ 'max_model_len-8192-sw-None': (4, 16),
686
+ },
687
+ 'q_head-64_kv_head-2_head-128': {
688
+ 'max_model_len-128-sw-None': (1, 16),
689
+ 'max_model_len-256-sw-None': (2, 16),
690
+ 'max_model_len-512-sw-None': (4, 16),
691
+ 'max_model_len-1024-sw-None': (8, 16),
692
+ 'max_model_len-2048-sw-None': (16, 16),
693
+ 'max_model_len-4096-sw-None': (32, 16),
694
+ 'max_model_len-8192-sw-None': (32, 16),
695
+ },
696
+ 'q_head-128_kv_head-8_head-256': {
697
+ 'max_model_len-128-sw-None': (1, 8),
698
+ 'max_model_len-256-sw-None': (2, 8),
699
+ 'max_model_len-512-sw-None': (4, 8),
700
+ 'max_model_len-1024-sw-None': (8, 8),
701
+ 'max_model_len-2048-sw-None': (8, 8),
702
+ 'max_model_len-4096-sw-None': (8, 8),
703
+ 'max_model_len-8192-sw-None': (8, 8),
704
+ },
705
+ 'q_head-32_kv_head-2_head-256': {
706
+ 'max_model_len-128-sw-None': (1, 16),
707
+ 'max_model_len-256-sw-None': (2, 16),
708
+ 'max_model_len-512-sw-None': (4, 16),
709
+ 'max_model_len-1024-sw-None': (8, 16),
710
+ 'max_model_len-2048-sw-None': (16, 16),
711
+ 'max_model_len-4096-sw-None': (32, 32),
712
+ 'max_model_len-8192-sw-None': (32, 32),
713
+ },
714
+ 'q_head-64_kv_head-16_head-128': {
715
+ 'max_model_len-128-sw-None': (1, 16),
716
+ 'max_model_len-256-sw-None': (2, 16),
717
+ 'max_model_len-512-sw-None': (4, 16),
718
+ 'max_model_len-1024-sw-None': (8, 16),
719
+ 'max_model_len-2048-sw-None': (8, 16),
720
+ 'max_model_len-4096-sw-None': (8, 16),
721
+ 'max_model_len-8192-sw-None': (8, 16),
722
+ },
723
+ 'q_head-16_kv_head-4_head-128': {
724
+ 'max_model_len-128-sw-None': (1, 32),
725
+ 'max_model_len-256-sw-None': (2, 64),
726
+ 'max_model_len-512-sw-None': (4, 32),
727
+ 'max_model_len-1024-sw-None': (8, 64),
728
+ 'max_model_len-2048-sw-None': (16, 32),
729
+ 'max_model_len-4096-sw-None': (32, 32),
730
+ 'max_model_len-8192-sw-None': (32, 32),
731
+ },
732
+ 'q_head-128_kv_head-2_head-128': {
733
+ 'max_model_len-128-sw-None': (1, 8),
734
+ 'max_model_len-256-sw-None': (2, 8),
735
+ 'max_model_len-512-sw-None': (4, 8),
736
+ 'max_model_len-1024-sw-None': (8, 8),
737
+ 'max_model_len-2048-sw-None': (16, 16),
738
+ 'max_model_len-4096-sw-None': (16, 16),
739
+ 'max_model_len-8192-sw-None': (16, 16),
740
+ },
741
+ 'q_head-64_kv_head-2_head-256': {
742
+ 'max_model_len-128-sw-None': (1, 8),
743
+ 'max_model_len-256-sw-None': (2, 8),
744
+ 'max_model_len-512-sw-None': (4, 8),
745
+ 'max_model_len-1024-sw-None': (8, 8),
746
+ 'max_model_len-2048-sw-None': (16, 16),
747
+ 'max_model_len-4096-sw-None': (32, 8),
748
+ 'max_model_len-8192-sw-None': (32, 16),
749
+ },
750
+ 'q_head-128_kv_head-16_head-128': {
751
+ 'max_model_len-128-sw-None': (1, 8),
752
+ 'max_model_len-256-sw-None': (2, 8),
753
+ 'max_model_len-512-sw-None': (4, 8),
754
+ 'max_model_len-1024-sw-None': (8, 8),
755
+ 'max_model_len-2048-sw-None': (8, 8),
756
+ 'max_model_len-4096-sw-None': (8, 8),
757
+ 'max_model_len-8192-sw-None': (8, 8),
758
+ },
759
+ 'q_head-32_kv_head-4_head-128': {
760
+ 'max_model_len-128-sw-None': (1, 32),
761
+ 'max_model_len-256-sw-None': (2, 32),
762
+ 'max_model_len-512-sw-None': (4, 32),
763
+ 'max_model_len-1024-sw-None': (8, 32),
764
+ 'max_model_len-2048-sw-None': (16, 32),
765
+ 'max_model_len-4096-sw-None': (32, 32),
766
+ 'max_model_len-8192-sw-None': (32, 32),
767
+ },
768
+ 'q_head-64_kv_head-16_head-256': {
769
+ 'max_model_len-128-sw-None': (1, 8),
770
+ 'max_model_len-256-sw-None': (2, 8),
771
+ 'max_model_len-512-sw-None': (4, 8),
772
+ 'max_model_len-1024-sw-None': (4, 8),
773
+ 'max_model_len-2048-sw-None': (4, 8),
774
+ 'max_model_len-4096-sw-None': (4, 8),
775
+ 'max_model_len-8192-sw-None': (4, 8),
776
+ },
777
+ 'q_head-16_kv_head-4_head-256': {
778
+ 'max_model_len-128-sw-None': (1, 32),
779
+ 'max_model_len-256-sw-None': (2, 32),
780
+ 'max_model_len-512-sw-None': (4, 32),
781
+ 'max_model_len-1024-sw-None': (8, 32),
782
+ 'max_model_len-2048-sw-None': (16, 32),
783
+ 'max_model_len-4096-sw-None': (32, 32),
784
+ 'max_model_len-8192-sw-None': (32, 64),
785
+ },
786
+ 'q_head-64_kv_head-32_head-128': {
787
+ 'max_model_len-128-sw-None': (1, 8),
788
+ 'max_model_len-256-sw-None': (2, 8),
789
+ 'max_model_len-512-sw-None': (4, 8),
790
+ 'max_model_len-1024-sw-None': (4, 8),
791
+ 'max_model_len-2048-sw-None': (4, 8),
792
+ 'max_model_len-4096-sw-None': (4, 8),
793
+ 'max_model_len-8192-sw-None': (4, 8),
794
+ },
795
+ 'q_head-128_kv_head-2_head-256': {
796
+ 'max_model_len-128-sw-None': (1, 8),
797
+ 'max_model_len-256-sw-None': (2, 8),
798
+ 'max_model_len-512-sw-None': (4, 8),
799
+ 'max_model_len-1024-sw-None': (8, 8),
800
+ 'max_model_len-2048-sw-None': (16, 8),
801
+ 'max_model_len-4096-sw-None': (16, 8),
802
+ 'max_model_len-8192-sw-None': (16, 16),
803
+ },
804
+ 'q_head-64_kv_head-4_head-128': {
805
+ 'max_model_len-128-sw-None': (1, 16),
806
+ 'max_model_len-256-sw-None': (2, 16),
807
+ 'max_model_len-512-sw-None': (4, 16),
808
+ 'max_model_len-1024-sw-None': (8, 16),
809
+ 'max_model_len-2048-sw-None': (16, 16),
810
+ 'max_model_len-4096-sw-None': (32, 16),
811
+ 'max_model_len-8192-sw-None': (32, 16),
812
+ },
813
+ 'q_head-16_kv_head-1_head-128': {
814
+ 'max_model_len-128-sw-None': (1, 64),
815
+ 'max_model_len-256-sw-None': (2, 64),
816
+ 'max_model_len-512-sw-None': (4, 64),
817
+ 'max_model_len-1024-sw-None': (8, 32),
818
+ 'max_model_len-2048-sw-None': (16, 32),
819
+ 'max_model_len-4096-sw-None': (32, 32),
820
+ 'max_model_len-8192-sw-None': (32, 32),
821
+ },
822
+ 'q_head-32_kv_head-4_head-256': {
823
+ 'max_model_len-128-sw-None': (1, 16),
824
+ 'max_model_len-256-sw-None': (2, 16),
825
+ 'max_model_len-512-sw-None': (4, 16),
826
+ 'max_model_len-1024-sw-None': (8, 32),
827
+ 'max_model_len-2048-sw-None': (16, 32),
828
+ 'max_model_len-4096-sw-None': (32, 16),
829
+ 'max_model_len-8192-sw-None': (32, 32),
830
+ },
831
+ },
832
+ },
833
+ 256: {
834
+ 'q_bfloat16_kv_bfloat16': {
835
+ 'q_head-2_kv_head-1_head-256': {
836
+ 'max_model_len-256-sw-None': (1, 64),
837
+ 'max_model_len-512-sw-None': (2, 128),
838
+ 'max_model_len-1024-sw-None': (4, 128),
839
+ 'max_model_len-2048-sw-None': (8, 64),
840
+ 'max_model_len-4096-sw-None': (16, 64),
841
+ 'max_model_len-8192-sw-None': (16, 128),
842
+ },
843
+ 'q_head-2_kv_head-1_head-128': {
844
+ 'max_model_len-1024-sw-None': (4, 128),
845
+ 'max_model_len-2048-sw-None': (8, 128),
846
+ 'max_model_len-4096-sw-None': (16, 128),
847
+ 'max_model_len-8192-sw-None': (16, 128),
848
+ 'max_model_len-256-sw-None': (1, 128),
849
+ 'max_model_len-512-sw-None': (2, 128),
850
+ },
851
+ 'q_head-8_kv_head-4_head-256': {
852
+ 'max_model_len-512-sw-None': (2, 64),
853
+ 'max_model_len-1024-sw-None': (4, 32),
854
+ 'max_model_len-2048-sw-None': (8, 64),
855
+ 'max_model_len-4096-sw-None': (16, 128),
856
+ 'max_model_len-8192-sw-None': (16, 128),
857
+ 'max_model_len-256-sw-None': (1, 32),
858
+ },
859
+ 'q_head-8_kv_head-4_head-128': {
860
+ 'max_model_len-2048-sw-None': (8, 128),
861
+ 'max_model_len-4096-sw-None': (16, 128),
862
+ 'max_model_len-8192-sw-None': (16, 128),
863
+ 'max_model_len-512-sw-None': (2, 128),
864
+ 'max_model_len-1024-sw-None': (4, 128),
865
+ 'max_model_len-256-sw-None': (1, 128),
866
+ },
867
+ 'q_head-16_kv_head-2_head-256': {
868
+ 'max_model_len-8192-sw-None': (16, 32),
869
+ 'max_model_len-256-sw-None': (1, 32),
870
+ 'max_model_len-512-sw-None': (2, 32),
871
+ 'max_model_len-1024-sw-None': (4, 32),
872
+ 'max_model_len-2048-sw-None': (8, 32),
873
+ 'max_model_len-4096-sw-None': (16, 32),
874
+ },
875
+ 'q_head-32_kv_head-16_head-256': {
876
+ 'max_model_len-2048-sw-None': (2, 16),
877
+ 'max_model_len-4096-sw-None': (2, 16),
878
+ 'max_model_len-8192-sw-None': (2, 16),
879
+ 'max_model_len-256-sw-None': (1, 16),
880
+ 'max_model_len-512-sw-None': (2, 16),
881
+ 'max_model_len-1024-sw-None': (2, 16),
882
+ },
883
+ 'q_head-32_kv_head-16_head-128': {
884
+ 'max_model_len-4096-sw-None': (4, 32),
885
+ 'max_model_len-8192-sw-None': (4, 32),
886
+ 'max_model_len-2048-sw-None': (4, 32),
887
+ 'max_model_len-256-sw-None': (1, 32),
888
+ 'max_model_len-512-sw-None': (2, 32),
889
+ 'max_model_len-1024-sw-None': (4, 32),
890
+ },
891
+ 'q_head-64_kv_head-2_head-128': {
892
+ 'max_model_len-256-sw-None': (1, 16),
893
+ 'max_model_len-512-sw-None': (2, 16),
894
+ 'max_model_len-1024-sw-None': (4, 16),
895
+ 'max_model_len-2048-sw-None': (8, 16),
896
+ 'max_model_len-4096-sw-None': (16, 16),
897
+ 'max_model_len-8192-sw-None': (16, 16),
898
+ },
899
+ 'q_head-128_kv_head-1_head-256': {
900
+ 'max_model_len-1024-sw-None': (4, 8),
901
+ 'max_model_len-2048-sw-None': (8, 8),
902
+ 'max_model_len-4096-sw-None': (16, 8),
903
+ 'max_model_len-8192-sw-None': (16, 8),
904
+ 'max_model_len-256-sw-None': (1, 8),
905
+ 'max_model_len-512-sw-None': (2, 8),
906
+ },
907
+ 'q_head-128_kv_head-8_head-256': {
908
+ 'max_model_len-256-sw-None': (1, 8),
909
+ 'max_model_len-512-sw-None': (2, 8),
910
+ 'max_model_len-1024-sw-None': (4, 8),
911
+ 'max_model_len-2048-sw-None': (4, 8),
912
+ 'max_model_len-4096-sw-None': (4, 8),
913
+ 'max_model_len-8192-sw-None': (4, 8),
914
+ },
915
+ 'q_head-32_kv_head-2_head-256': {
916
+ 'max_model_len-256-sw-None': (1, 16),
917
+ 'max_model_len-512-sw-None': (2, 16),
918
+ 'max_model_len-1024-sw-None': (4, 16),
919
+ 'max_model_len-2048-sw-None': (8, 32),
920
+ 'max_model_len-4096-sw-None': (16, 32),
921
+ 'max_model_len-8192-sw-None': (16, 32),
922
+ },
923
+ 'q_head-64_kv_head-16_head-128': {
924
+ 'max_model_len-256-sw-None': (1, 16),
925
+ 'max_model_len-512-sw-None': (2, 16),
926
+ 'max_model_len-1024-sw-None': (4, 16),
927
+ 'max_model_len-2048-sw-None': (4, 16),
928
+ 'max_model_len-4096-sw-None': (4, 16),
929
+ 'max_model_len-8192-sw-None': (4, 16),
930
+ },
931
+ 'q_head-16_kv_head-4_head-128': {
932
+ 'max_model_len-256-sw-None': (1, 32),
933
+ 'max_model_len-512-sw-None': (2, 32),
934
+ 'max_model_len-1024-sw-None': (4, 32),
935
+ 'max_model_len-2048-sw-None': (8, 64),
936
+ 'max_model_len-4096-sw-None': (16, 32),
937
+ 'max_model_len-8192-sw-None': (16, 32),
938
+ },
939
+ 'q_head-4_kv_head-1_head-128': {
940
+ 'max_model_len-256-sw-None': (1, 128),
941
+ 'max_model_len-512-sw-None': (2, 128),
942
+ 'max_model_len-1024-sw-None': (4, 128),
943
+ 'max_model_len-2048-sw-None': (8, 128),
944
+ 'max_model_len-4096-sw-None': (16, 128),
945
+ 'max_model_len-8192-sw-None': (16, 128),
946
+ },
947
+ 'q_head-128_kv_head-2_head-128': {
948
+ 'max_model_len-256-sw-None': (1, 8),
949
+ 'max_model_len-512-sw-None': (2, 8),
950
+ 'max_model_len-1024-sw-None': (4, 8),
951
+ 'max_model_len-2048-sw-None': (8, 16),
952
+ 'max_model_len-4096-sw-None': (16, 8),
953
+ 'max_model_len-8192-sw-None': (16, 8),
954
+ },
955
+ 'q_head-64_kv_head-2_head-256': {
956
+ 'max_model_len-256-sw-None': (1, 16),
957
+ 'max_model_len-512-sw-None': (2, 8),
958
+ 'max_model_len-1024-sw-None': (4, 16),
959
+ 'max_model_len-2048-sw-None': (8, 16),
960
+ 'max_model_len-4096-sw-None': (16, 8),
961
+ 'max_model_len-8192-sw-None': (16, 8),
962
+ },
963
+ 'q_head-128_kv_head-16_head-128': {
964
+ 'max_model_len-256-sw-None': (1, 8),
965
+ 'max_model_len-512-sw-None': (2, 8),
966
+ 'max_model_len-1024-sw-None': (4, 8),
967
+ 'max_model_len-2048-sw-None': (4, 8),
968
+ 'max_model_len-4096-sw-None': (4, 8),
969
+ 'max_model_len-8192-sw-None': (4, 8),
970
+ },
971
+ 'q_head-8_kv_head-2_head-256': {
972
+ 'max_model_len-2048-sw-None': (8, 32),
973
+ 'max_model_len-4096-sw-None': (16, 32),
974
+ 'max_model_len-8192-sw-None': (16, 32),
975
+ 'max_model_len-256-sw-None': (1, 32),
976
+ 'max_model_len-512-sw-None': (2, 32),
977
+ 'max_model_len-1024-sw-None': (4, 32),
978
+ },
979
+ 'q_head-16_kv_head-2_head-128': {
980
+ 'max_model_len-8192-sw-None': (16, 32),
981
+ 'max_model_len-256-sw-None': (1, 32),
982
+ 'max_model_len-512-sw-None': (2, 32),
983
+ 'max_model_len-1024-sw-None': (4, 32),
984
+ 'max_model_len-2048-sw-None': (8, 32),
985
+ 'max_model_len-4096-sw-None': (16, 32),
986
+ },
987
+ 'q_head-32_kv_head-8_head-256': {
988
+ 'max_model_len-4096-sw-None': (4, 32),
989
+ 'max_model_len-8192-sw-None': (4, 32),
990
+ 'max_model_len-256-sw-None': (1, 32),
991
+ 'max_model_len-512-sw-None': (2, 32),
992
+ 'max_model_len-1024-sw-None': (4, 32),
993
+ 'max_model_len-2048-sw-None': (4, 32),
994
+ },
995
+ 'q_head-64_kv_head-1_head-256': {
996
+ 'max_model_len-256-sw-None': (1, 8),
997
+ 'max_model_len-512-sw-None': (2, 8),
998
+ 'max_model_len-1024-sw-None': (4, 8),
999
+ 'max_model_len-2048-sw-None': (8, 8),
1000
+ 'max_model_len-4096-sw-None': (16, 8),
1001
+ 'max_model_len-8192-sw-None': (16, 16),
1002
+ },
1003
+ 'q_head-128_kv_head-1_head-128': {
1004
+ 'max_model_len-1024-sw-None': (4, 8),
1005
+ 'max_model_len-2048-sw-None': (8, 8),
1006
+ 'max_model_len-4096-sw-None': (16, 8),
1007
+ 'max_model_len-256-sw-None': (1, 8),
1008
+ 'max_model_len-8192-sw-None': (16, 8),
1009
+ 'max_model_len-512-sw-None': (2, 8),
1010
+ },
1011
+ 'q_head-128_kv_head-8_head-128': {
1012
+ 'max_model_len-256-sw-None': (1, 8),
1013
+ 'max_model_len-512-sw-None': (2, 16),
1014
+ 'max_model_len-1024-sw-None': (4, 16),
1015
+ 'max_model_len-2048-sw-None': (8, 16),
1016
+ 'max_model_len-4096-sw-None': (8, 16),
1017
+ 'max_model_len-8192-sw-None': (8, 16),
1018
+ },
1019
+ 'q_head-8_kv_head-1_head-128': {
1020
+ 'max_model_len-256-sw-None': (1, 64),
1021
+ 'max_model_len-512-sw-None': (2, 32),
1022
+ 'max_model_len-1024-sw-None': (4, 32),
1023
+ 'max_model_len-2048-sw-None': (8, 32),
1024
+ 'max_model_len-4096-sw-None': (16, 32),
1025
+ 'max_model_len-8192-sw-None': (16, 32),
1026
+ },
1027
+ 'q_head-32_kv_head-4_head-128': {
1028
+ 'max_model_len-256-sw-None': (1, 32),
1029
+ 'max_model_len-512-sw-None': (2, 32),
1030
+ 'max_model_len-1024-sw-None': (4, 32),
1031
+ 'max_model_len-2048-sw-None': (8, 32),
1032
+ 'max_model_len-4096-sw-None': (16, 32),
1033
+ 'max_model_len-8192-sw-None': (16, 32),
1034
+ },
1035
+ 'q_head-64_kv_head-16_head-256': {
1036
+ 'max_model_len-256-sw-None': (1, 8),
1037
+ 'max_model_len-512-sw-None': (2, 8),
1038
+ 'max_model_len-1024-sw-None': (2, 8),
1039
+ 'max_model_len-2048-sw-None': (2, 8),
1040
+ 'max_model_len-4096-sw-None': (2, 8),
1041
+ 'max_model_len-8192-sw-None': (2, 8),
1042
+ },
1043
+ 'q_head-16_kv_head-4_head-256': {
1044
+ 'max_model_len-256-sw-None': (1, 32),
1045
+ 'max_model_len-512-sw-None': (2, 32),
1046
+ 'max_model_len-1024-sw-None': (4, 32),
1047
+ 'max_model_len-2048-sw-None': (8, 32),
1048
+ 'max_model_len-4096-sw-None': (16, 32),
1049
+ 'max_model_len-8192-sw-None': (16, 64),
1050
+ },
1051
+ 'q_head-4_kv_head-1_head-256': {
1052
+ 'max_model_len-256-sw-None': (1, 32),
1053
+ 'max_model_len-512-sw-None': (2, 32),
1054
+ 'max_model_len-1024-sw-None': (4, 32),
1055
+ 'max_model_len-2048-sw-None': (8, 32),
1056
+ 'max_model_len-4096-sw-None': (16, 32),
1057
+ 'max_model_len-8192-sw-None': (16, 32),
1058
+ },
1059
+ 'q_head-64_kv_head-32_head-128': {
1060
+ 'max_model_len-256-sw-None': (1, 8),
1061
+ 'max_model_len-512-sw-None': (2, 8),
1062
+ 'max_model_len-1024-sw-None': (2, 8),
1063
+ 'max_model_len-2048-sw-None': (2, 8),
1064
+ 'max_model_len-4096-sw-None': (2, 8),
1065
+ 'max_model_len-8192-sw-None': (2, 8),
1066
+ },
1067
+ 'q_head-16_kv_head-8_head-128': {
1068
+ 'max_model_len-256-sw-None': (1, 64),
1069
+ 'max_model_len-512-sw-None': (2, 64),
1070
+ 'max_model_len-1024-sw-None': (4, 32),
1071
+ 'max_model_len-2048-sw-None': (8, 64),
1072
+ 'max_model_len-4096-sw-None': (16, 64),
1073
+ 'max_model_len-8192-sw-None': (16, 64),
1074
+ },
1075
+ 'q_head-128_kv_head-2_head-256': {
1076
+ 'max_model_len-256-sw-None': (1, 8),
1077
+ 'max_model_len-512-sw-None': (2, 8),
1078
+ 'max_model_len-1024-sw-None': (4, 8),
1079
+ 'max_model_len-2048-sw-None': (8, 8),
1080
+ 'max_model_len-4096-sw-None': (16, 8),
1081
+ 'max_model_len-8192-sw-None': (16, 8),
1082
+ },
1083
+ 'q_head-4_kv_head-2_head-128': {
1084
+ 'max_model_len-256-sw-None': (1, 128),
1085
+ 'max_model_len-512-sw-None': (2, 128),
1086
+ 'max_model_len-1024-sw-None': (4, 128),
1087
+ 'max_model_len-2048-sw-None': (8, 64),
1088
+ 'max_model_len-4096-sw-None': (16, 128),
1089
+ 'max_model_len-8192-sw-None': (16, 128),
1090
+ },
1091
+ 'q_head-64_kv_head-4_head-128': {
1092
+ 'max_model_len-256-sw-None': (1, 16),
1093
+ 'max_model_len-512-sw-None': (2, 16),
1094
+ 'max_model_len-1024-sw-None': (4, 16),
1095
+ 'max_model_len-2048-sw-None': (8, 32),
1096
+ 'max_model_len-4096-sw-None': (16, 16),
1097
+ 'max_model_len-8192-sw-None': (16, 16),
1098
+ },
1099
+ 'q_head-16_kv_head-1_head-128': {
1100
+ 'max_model_len-256-sw-None': (1, 64),
1101
+ 'max_model_len-512-sw-None': (2, 32),
1102
+ 'max_model_len-1024-sw-None': (4, 32),
1103
+ 'max_model_len-2048-sw-None': (8, 32),
1104
+ 'max_model_len-4096-sw-None': (16, 32),
1105
+ 'max_model_len-8192-sw-None': (16, 32),
1106
+ },
1107
+ 'q_head-32_kv_head-4_head-256': {
1108
+ 'max_model_len-256-sw-None': (1, 16),
1109
+ 'max_model_len-512-sw-None': (2, 16),
1110
+ 'max_model_len-1024-sw-None': (4, 32),
1111
+ 'max_model_len-2048-sw-None': (8, 32),
1112
+ 'max_model_len-4096-sw-None': (16, 16),
1113
+ 'max_model_len-8192-sw-None': (16, 32),
1114
+ },
1115
+ 'q_head-8_kv_head-1_head-256': {
1116
+ 'max_model_len-256-sw-None': (1, 32),
1117
+ 'max_model_len-512-sw-None': (2, 32),
1118
+ 'max_model_len-1024-sw-None': (4, 32),
1119
+ 'max_model_len-2048-sw-None': (8, 32),
1120
+ 'max_model_len-4096-sw-None': (16, 32),
1121
+ 'max_model_len-8192-sw-None': (16, 32),
1122
+ },
1123
+ 'q_head-16_kv_head-8_head-256': {
1124
+ 'max_model_len-256-sw-None': (1, 32),
1125
+ 'max_model_len-512-sw-None': (2, 32),
1126
+ 'max_model_len-1024-sw-None': (4, 32),
1127
+ 'max_model_len-2048-sw-None': (8, 32),
1128
+ 'max_model_len-4096-sw-None': (8, 64),
1129
+ 'max_model_len-8192-sw-None': (8, 64),
1130
+ },
1131
+ 'q_head-4_kv_head-2_head-256': {
1132
+ 'max_model_len-256-sw-None': (1, 64),
1133
+ 'max_model_len-512-sw-None': (2, 64),
1134
+ 'max_model_len-1024-sw-None': (4, 64),
1135
+ 'max_model_len-2048-sw-None': (8, 64),
1136
+ 'max_model_len-4096-sw-None': (16, 128),
1137
+ 'max_model_len-8192-sw-None': (16, 128),
1138
+ },
1139
+ 'q_head-128_kv_head-4_head-128': {
1140
+ 'max_model_len-256-sw-None': (1, 8),
1141
+ 'max_model_len-512-sw-None': (2, 8),
1142
+ 'max_model_len-1024-sw-None': (4, 8),
1143
+ 'max_model_len-2048-sw-None': (8, 16),
1144
+ 'max_model_len-4096-sw-None': (8, 16),
1145
+ 'max_model_len-8192-sw-None': (8, 16),
1146
+ },
1147
+ 'q_head-32_kv_head-1_head-128': {
1148
+ 'max_model_len-256-sw-None': (1, 32),
1149
+ 'max_model_len-512-sw-None': (2, 32),
1150
+ 'max_model_len-1024-sw-None': (4, 32),
1151
+ 'max_model_len-2048-sw-None': (8, 32),
1152
+ 'max_model_len-4096-sw-None': (16, 32),
1153
+ 'max_model_len-8192-sw-None': (16, 32),
1154
+ },
1155
+ 'q_head-64_kv_head-4_head-256': {
1156
+ 'max_model_len-256-sw-None': (1, 16),
1157
+ 'max_model_len-512-sw-None': (2, 16),
1158
+ 'max_model_len-1024-sw-None': (4, 16),
1159
+ 'max_model_len-2048-sw-None': (8, 16),
1160
+ 'max_model_len-4096-sw-None': (16, 16),
1161
+ 'max_model_len-8192-sw-None': (16, 16),
1162
+ },
1163
+ 'q_head-16_kv_head-1_head-256': {
1164
+ 'max_model_len-256-sw-None': (1, 32),
1165
+ 'max_model_len-512-sw-None': (2, 32),
1166
+ 'max_model_len-1024-sw-None': (4, 32),
1167
+ 'max_model_len-2048-sw-None': (8, 32),
1168
+ 'max_model_len-4096-sw-None': (16, 32),
1169
+ 'max_model_len-8192-sw-None': (16, 32),
1170
+ },
1171
+ 'q_head-32_kv_head-8_head-128': {
1172
+ 'max_model_len-256-sw-None': (1, 32),
1173
+ 'max_model_len-512-sw-None': (2, 32),
1174
+ 'max_model_len-1024-sw-None': (4, 32),
1175
+ 'max_model_len-2048-sw-None': (8, 32),
1176
+ 'max_model_len-4096-sw-None': (16, 32),
1177
+ 'max_model_len-8192-sw-None': (16, 32),
1178
+ },
1179
+ 'q_head-8_kv_head-2_head-128': {
1180
+ 'max_model_len-256-sw-None': (1, 64),
1181
+ 'max_model_len-512-sw-None': (2, 64),
1182
+ 'max_model_len-1024-sw-None': (4, 64),
1183
+ 'max_model_len-2048-sw-None': (8, 32),
1184
+ 'max_model_len-4096-sw-None': (16, 32),
1185
+ 'max_model_len-8192-sw-None': (16, 32),
1186
+ },
1187
+ 'q_head-32_kv_head-2_head-128': {
1188
+ 'max_model_len-256-sw-None': (1, 32),
1189
+ 'max_model_len-512-sw-None': (2, 32),
1190
+ 'max_model_len-1024-sw-None': (4, 32),
1191
+ 'max_model_len-2048-sw-None': (8, 32),
1192
+ 'max_model_len-4096-sw-None': (16, 32),
1193
+ 'max_model_len-8192-sw-None': (16, 32),
1194
+ },
1195
+ 'q_head-64_kv_head-1_head-128': {
1196
+ 'max_model_len-256-sw-None': (1, 16),
1197
+ 'max_model_len-512-sw-None': (2, 16),
1198
+ 'max_model_len-1024-sw-None': (4, 16),
1199
+ 'max_model_len-2048-sw-None': (8, 16),
1200
+ 'max_model_len-4096-sw-None': (16, 16),
1201
+ 'max_model_len-8192-sw-None': (16, 16),
1202
+ },
1203
+ 'q_head-128_kv_head-4_head-256': {
1204
+ 'max_model_len-256-sw-None': (1, 8),
1205
+ 'max_model_len-512-sw-None': (2, 8),
1206
+ 'max_model_len-1024-sw-None': (4, 8),
1207
+ 'max_model_len-2048-sw-None': (8, 8),
1208
+ 'max_model_len-4096-sw-None': (16, 8),
1209
+ 'max_model_len-8192-sw-None': (16, 8),
1210
+ },
1211
+ 'q_head-32_kv_head-1_head-256': {
1212
+ 'max_model_len-256-sw-None': (1, 16),
1213
+ 'max_model_len-512-sw-None': (2, 16),
1214
+ 'max_model_len-1024-sw-None': (4, 16),
1215
+ 'max_model_len-2048-sw-None': (8, 16),
1216
+ 'max_model_len-4096-sw-None': (16, 16),
1217
+ 'max_model_len-8192-sw-None': (16, 16),
1218
+ },
1219
+ 'q_head-64_kv_head-8_head-128': {
1220
+ 'max_model_len-256-sw-None': (1, 16),
1221
+ 'max_model_len-512-sw-None': (2, 16),
1222
+ 'max_model_len-1024-sw-None': (4, 32),
1223
+ 'max_model_len-2048-sw-None': (8, 32),
1224
+ 'max_model_len-4096-sw-None': (8, 32),
1225
+ 'max_model_len-8192-sw-None': (8, 32),
1226
+ },
1227
+ 'q_head-64_kv_head-8_head-256': {
1228
+ 'max_model_len-256-sw-None': (1, 16),
1229
+ 'max_model_len-512-sw-None': (2, 16),
1230
+ 'max_model_len-1024-sw-None': (4, 16),
1231
+ 'max_model_len-2048-sw-None': (4, 16),
1232
+ 'max_model_len-4096-sw-None': (4, 16),
1233
+ 'max_model_len-8192-sw-None': (4, 16),
1234
+ },
1235
+ },
1236
+ 'q_bfloat16_kv_float8_e4m3fn': {
1237
+ 'q_head-2_kv_head-2_head-128': {
1238
+ 'max_model_len-4096-sw-None': (16, 32),
1239
+ 'max_model_len-8192-sw-None': (16, 64),
1240
+ 'max_model_len-256-sw-None': (1, 128),
1241
+ 'max_model_len-512-sw-None': (2, 128),
1242
+ 'max_model_len-1024-sw-None': (4, 128),
1243
+ 'max_model_len-2048-sw-None': (8, 64),
1244
+ },
1245
+ 'q_head-8_kv_head-4_head-128': {
1246
+ 'max_model_len-256-sw-None': (1, 64),
1247
+ 'max_model_len-512-sw-None': (2, 64),
1248
+ 'max_model_len-1024-sw-None': (4, 64),
1249
+ 'max_model_len-2048-sw-None': (8, 32),
1250
+ 'max_model_len-4096-sw-None': (16, 128),
1251
+ 'max_model_len-8192-sw-None': (16, 128),
1252
+ },
1253
+ 'q_head-16_kv_head-2_head-256': {
1254
+ 'max_model_len-1024-sw-None': (4, 32),
1255
+ 'max_model_len-2048-sw-None': (8, 32),
1256
+ 'max_model_len-4096-sw-None': (16, 32),
1257
+ 'max_model_len-8192-sw-None': (16, 32),
1258
+ 'max_model_len-256-sw-None': (1, 32),
1259
+ 'max_model_len-512-sw-None': (2, 32),
1260
+ },
1261
+ 'q_head-32_kv_head-2_head-128': {
1262
+ 'max_model_len-4096-sw-None': (16, 32),
1263
+ 'max_model_len-8192-sw-None': (16, 32),
1264
+ 'max_model_len-256-sw-None': (1, 32),
1265
+ 'max_model_len-512-sw-None': (2, 32),
1266
+ 'max_model_len-1024-sw-None': (4, 32),
1267
+ 'max_model_len-2048-sw-None': (8, 32),
1268
+ },
1269
+ 'q_head-32_kv_head-16_head-128': {
1270
+ 'max_model_len-512-sw-None': (2, 32),
1271
+ 'max_model_len-1024-sw-None': (4, 32),
1272
+ 'max_model_len-2048-sw-None': (8, 32),
1273
+ 'max_model_len-4096-sw-None': (16, 16),
1274
+ 'max_model_len-8192-sw-None': (16, 16),
1275
+ 'max_model_len-256-sw-None': (1, 32),
1276
+ },
1277
+ 'q_head-64_kv_head-8_head-256': {
1278
+ 'max_model_len-2048-sw-None': (8, 16),
1279
+ 'max_model_len-4096-sw-None': (8, 16),
1280
+ 'max_model_len-8192-sw-None': (8, 16),
1281
+ 'max_model_len-256-sw-None': (1, 16),
1282
+ 'max_model_len-512-sw-None': (2, 16),
1283
+ 'max_model_len-1024-sw-None': (4, 16),
1284
+ },
1285
+ 'q_head-128_kv_head-2_head-256': {
1286
+ 'max_model_len-512-sw-None': (2, 8),
1287
+ 'max_model_len-1024-sw-None': (4, 8),
1288
+ 'max_model_len-2048-sw-None': (8, 8),
1289
+ 'max_model_len-4096-sw-None': (16, 8),
1290
+ 'max_model_len-8192-sw-None': (16, 8),
1291
+ 'max_model_len-256-sw-None': (1, 8),
1292
+ },
1293
+ 'q_head-128_kv_head-8_head-128': {
1294
+ 'max_model_len-4096-sw-None': (8, 16),
1295
+ 'max_model_len-8192-sw-None': (8, 16),
1296
+ 'max_model_len-256-sw-None': (1, 8),
1297
+ 'max_model_len-512-sw-None': (2, 16),
1298
+ 'max_model_len-1024-sw-None': (4, 16),
1299
+ 'max_model_len-2048-sw-None': (8, 16),
1300
+ },
1301
+ 'q_head-32_kv_head-16_head-256': {
1302
+ 'max_model_len-256-sw-None': (1, 16),
1303
+ 'max_model_len-512-sw-None': (2, 16),
1304
+ 'max_model_len-1024-sw-None': (4, 16),
1305
+ 'max_model_len-2048-sw-None': (4, 16),
1306
+ 'max_model_len-4096-sw-None': (4, 16),
1307
+ 'max_model_len-8192-sw-None': (4, 16),
1308
+ },
1309
+ 'q_head-64_kv_head-2_head-128': {
1310
+ 'max_model_len-256-sw-None': (1, 16),
1311
+ 'max_model_len-512-sw-None': (2, 16),
1312
+ 'max_model_len-1024-sw-None': (4, 16),
1313
+ 'max_model_len-2048-sw-None': (8, 16),
1314
+ 'max_model_len-4096-sw-None': (16, 16),
1315
+ 'max_model_len-8192-sw-None': (16, 16),
1316
+ },
1317
+ 'q_head-8_kv_head-4_head-256': {
1318
+ 'max_model_len-256-sw-None': (1, 32),
1319
+ 'max_model_len-512-sw-None': (2, 32),
1320
+ 'max_model_len-1024-sw-None': (4, 32),
1321
+ 'max_model_len-2048-sw-None': (8, 32),
1322
+ 'max_model_len-4096-sw-None': (16, 64),
1323
+ 'max_model_len-8192-sw-None': (16, 64),
1324
+ },
1325
+ 'q_head-2_kv_head-2_head-256': {
1326
+ 'max_model_len-256-sw-None': (1, 64),
1327
+ 'max_model_len-512-sw-None': (2, 32),
1328
+ 'max_model_len-1024-sw-None': (4, 64),
1329
+ 'max_model_len-2048-sw-None': (8, 64),
1330
+ 'max_model_len-4096-sw-None': (16, 32),
1331
+ 'max_model_len-8192-sw-None': (16, 32),
1332
+ },
1333
+ 'q_head-128_kv_head-8_head-256': {
1334
+ 'max_model_len-256-sw-None': (1, 8),
1335
+ 'max_model_len-512-sw-None': (2, 8),
1336
+ 'max_model_len-1024-sw-None': (4, 8),
1337
+ 'max_model_len-2048-sw-None': (8, 8),
1338
+ 'max_model_len-4096-sw-None': (8, 8),
1339
+ 'max_model_len-8192-sw-None': (8, 8),
1340
+ },
1341
+ 'q_head-32_kv_head-2_head-256': {
1342
+ 'max_model_len-256-sw-None': (1, 16),
1343
+ 'max_model_len-512-sw-None': (2, 16),
1344
+ 'max_model_len-1024-sw-None': (4, 16),
1345
+ 'max_model_len-2048-sw-None': (8, 16),
1346
+ 'max_model_len-4096-sw-None': (16, 16),
1347
+ 'max_model_len-8192-sw-None': (16, 32),
1348
+ },
1349
+ 'q_head-64_kv_head-16_head-128': {
1350
+ 'max_model_len-256-sw-None': (1, 16),
1351
+ 'max_model_len-512-sw-None': (2, 16),
1352
+ 'max_model_len-1024-sw-None': (4, 16),
1353
+ 'max_model_len-2048-sw-None': (8, 16),
1354
+ 'max_model_len-4096-sw-None': (16, 8),
1355
+ 'max_model_len-8192-sw-None': (16, 8),
1356
+ },
1357
+ 'q_head-4_kv_head-2_head-128': {
1358
+ 'max_model_len-256-sw-None': (1, 128),
1359
+ 'max_model_len-512-sw-None': (2, 128),
1360
+ 'max_model_len-1024-sw-None': (4, 64),
1361
+ 'max_model_len-2048-sw-None': (8, 128),
1362
+ 'max_model_len-4096-sw-None': (16, 128),
1363
+ 'max_model_len-8192-sw-None': (16, 128),
1364
+ },
1365
+ 'q_head-16_kv_head-4_head-128': {
1366
+ 'max_model_len-256-sw-None': (1, 32),
1367
+ 'max_model_len-512-sw-None': (2, 32),
1368
+ 'max_model_len-1024-sw-None': (4, 32),
1369
+ 'max_model_len-2048-sw-None': (8, 32),
1370
+ 'max_model_len-4096-sw-None': (16, 32),
1371
+ 'max_model_len-8192-sw-None': (16, 32),
1372
+ },
1373
+ 'q_head-128_kv_head-2_head-128': {
1374
+ 'max_model_len-256-sw-None': (1, 8),
1375
+ 'max_model_len-512-sw-None': (2, 8),
1376
+ 'max_model_len-1024-sw-None': (4, 8),
1377
+ 'max_model_len-2048-sw-None': (8, 8),
1378
+ 'max_model_len-4096-sw-None': (8, 16),
1379
+ 'max_model_len-8192-sw-None': (8, 16),
1380
+ },
1381
+ 'q_head-8_kv_head-2_head-256': {
1382
+ 'max_model_len-256-sw-None': (1, 32),
1383
+ 'max_model_len-512-sw-None': (2, 32),
1384
+ 'max_model_len-1024-sw-None': (4, 32),
1385
+ 'max_model_len-2048-sw-None': (8, 32),
1386
+ 'max_model_len-4096-sw-None': (16, 32),
1387
+ 'max_model_len-8192-sw-None': (16, 32),
1388
+ },
1389
+ 'q_head-16_kv_head-2_head-128': {
1390
+ 'max_model_len-1024-sw-None': (4, 32),
1391
+ 'max_model_len-2048-sw-None': (8, 32),
1392
+ 'max_model_len-256-sw-None': (1, 64),
1393
+ 'max_model_len-512-sw-None': (2, 32),
1394
+ 'max_model_len-4096-sw-None': (16, 32),
1395
+ 'max_model_len-8192-sw-None': (16, 32),
1396
+ },
1397
+ 'q_head-32_kv_head-8_head-256': {
1398
+ 'max_model_len-512-sw-None': (2, 16),
1399
+ 'max_model_len-1024-sw-None': (4, 32),
1400
+ 'max_model_len-2048-sw-None': (8, 32),
1401
+ 'max_model_len-4096-sw-None': (8, 32),
1402
+ 'max_model_len-256-sw-None': (1, 16),
1403
+ 'max_model_len-8192-sw-None': (8, 32),
1404
+ },
1405
+ 'q_head-64_kv_head-8_head-128': {
1406
+ 'max_model_len-2048-sw-None': (8, 32),
1407
+ 'max_model_len-4096-sw-None': (16, 16),
1408
+ 'max_model_len-8192-sw-None': (16, 16),
1409
+ 'max_model_len-256-sw-None': (1, 16),
1410
+ 'max_model_len-512-sw-None': (2, 16),
1411
+ 'max_model_len-1024-sw-None': (4, 32),
1412
+ },
1413
+ 'q_head-128_kv_head-4_head-256': {
1414
+ 'max_model_len-4096-sw-None': (16, 8),
1415
+ 'max_model_len-8192-sw-None': (16, 8),
1416
+ 'max_model_len-256-sw-None': (1, 8),
1417
+ 'max_model_len-512-sw-None': (2, 8),
1418
+ 'max_model_len-1024-sw-None': (4, 8),
1419
+ 'max_model_len-2048-sw-None': (8, 8),
1420
+ },
1421
+ 'q_head-64_kv_head-2_head-256': {
1422
+ 'max_model_len-256-sw-None': (1, 8),
1423
+ 'max_model_len-512-sw-None': (2, 8),
1424
+ 'max_model_len-1024-sw-None': (4, 8),
1425
+ 'max_model_len-2048-sw-None': (8, 8),
1426
+ 'max_model_len-4096-sw-None': (16, 8),
1427
+ 'max_model_len-8192-sw-None': (16, 8),
1428
+ },
1429
+ 'q_head-32_kv_head-4_head-128': {
1430
+ 'max_model_len-256-sw-None': (1, 32),
1431
+ 'max_model_len-512-sw-None': (2, 32),
1432
+ 'max_model_len-1024-sw-None': (4, 32),
1433
+ 'max_model_len-2048-sw-None': (8, 32),
1434
+ 'max_model_len-4096-sw-None': (16, 32),
1435
+ 'max_model_len-8192-sw-None': (16, 32),
1436
+ },
1437
+ 'q_head-128_kv_head-16_head-128': {
1438
+ 'max_model_len-256-sw-None': (1, 8),
1439
+ 'max_model_len-512-sw-None': (2, 8),
1440
+ 'max_model_len-1024-sw-None': (4, 8),
1441
+ 'max_model_len-2048-sw-None': (8, 8),
1442
+ 'max_model_len-4096-sw-None': (8, 8),
1443
+ 'max_model_len-8192-sw-None': (8, 8),
1444
+ },
1445
+ 'q_head-8_kv_head-2_head-128': {
1446
+ 'max_model_len-256-sw-None': (1, 128),
1447
+ 'max_model_len-512-sw-None': (2, 64),
1448
+ 'max_model_len-1024-sw-None': (4, 32),
1449
+ 'max_model_len-2048-sw-None': (8, 32),
1450
+ 'max_model_len-4096-sw-None': (16, 32),
1451
+ 'max_model_len-8192-sw-None': (16, 32),
1452
+ },
1453
+ 'q_head-64_kv_head-16_head-256': {
1454
+ 'max_model_len-256-sw-None': (1, 8),
1455
+ 'max_model_len-512-sw-None': (2, 8),
1456
+ 'max_model_len-1024-sw-None': (4, 8),
1457
+ 'max_model_len-2048-sw-None': (4, 8),
1458
+ 'max_model_len-4096-sw-None': (4, 8),
1459
+ 'max_model_len-8192-sw-None': (4, 8),
1460
+ },
1461
+ 'q_head-16_kv_head-4_head-256': {
1462
+ 'max_model_len-256-sw-None': (1, 32),
1463
+ 'max_model_len-512-sw-None': (2, 32),
1464
+ 'max_model_len-1024-sw-None': (4, 32),
1465
+ 'max_model_len-2048-sw-None': (8, 32),
1466
+ 'max_model_len-4096-sw-None': (16, 32),
1467
+ 'max_model_len-8192-sw-None': (16, 32),
1468
+ },
1469
+ 'q_head-64_kv_head-32_head-128': {
1470
+ 'max_model_len-256-sw-None': (1, 8),
1471
+ 'max_model_len-512-sw-None': (2, 8),
1472
+ 'max_model_len-1024-sw-None': (4, 8),
1473
+ 'max_model_len-2048-sw-None': (4, 8),
1474
+ 'max_model_len-4096-sw-None': (4, 8),
1475
+ 'max_model_len-8192-sw-None': (4, 8),
1476
+ },
1477
+ 'q_head-4_kv_head-2_head-256': {
1478
+ 'max_model_len-256-sw-None': (1, 64),
1479
+ 'max_model_len-512-sw-None': (2, 64),
1480
+ 'max_model_len-1024-sw-None': (4, 64),
1481
+ 'max_model_len-2048-sw-None': (8, 32),
1482
+ 'max_model_len-4096-sw-None': (16, 32),
1483
+ 'max_model_len-8192-sw-None': (16, 64),
1484
+ },
1485
+ 'q_head-16_kv_head-8_head-128': {
1486
+ 'max_model_len-256-sw-None': (1, 32),
1487
+ 'max_model_len-512-sw-None': (2, 64),
1488
+ 'max_model_len-1024-sw-None': (4, 32),
1489
+ 'max_model_len-2048-sw-None': (8, 32),
1490
+ 'max_model_len-4096-sw-None': (16, 32),
1491
+ 'max_model_len-8192-sw-None': (16, 64),
1492
+ },
1493
+ 'q_head-64_kv_head-4_head-128': {
1494
+ 'max_model_len-256-sw-None': (1, 16),
1495
+ 'max_model_len-512-sw-None': (2, 16),
1496
+ 'max_model_len-1024-sw-None': (4, 16),
1497
+ 'max_model_len-2048-sw-None': (8, 16),
1498
+ 'max_model_len-4096-sw-None': (16, 16),
1499
+ 'max_model_len-8192-sw-None': (16, 32),
1500
+ },
1501
+ 'q_head-32_kv_head-4_head-256': {
1502
+ 'max_model_len-256-sw-None': (1, 16),
1503
+ 'max_model_len-512-sw-None': (2, 16),
1504
+ 'max_model_len-1024-sw-None': (4, 16),
1505
+ 'max_model_len-2048-sw-None': (8, 32),
1506
+ 'max_model_len-4096-sw-None': (16, 32),
1507
+ 'max_model_len-8192-sw-None': (16, 32),
1508
+ },
1509
+ 'q_head-16_kv_head-8_head-256': {
1510
+ 'max_model_len-256-sw-None': (1, 32),
1511
+ 'max_model_len-512-sw-None': (2, 32),
1512
+ 'max_model_len-1024-sw-None': (4, 32),
1513
+ 'max_model_len-2048-sw-None': (8, 32),
1514
+ 'max_model_len-4096-sw-None': (16, 32),
1515
+ 'max_model_len-8192-sw-None': (16, 32),
1516
+ },
1517
+ 'q_head-128_kv_head-4_head-128': {
1518
+ 'max_model_len-256-sw-None': (1, 8),
1519
+ 'max_model_len-512-sw-None': (2, 8),
1520
+ 'max_model_len-1024-sw-None': (4, 8),
1521
+ 'max_model_len-2048-sw-None': (8, 8),
1522
+ 'max_model_len-4096-sw-None': (8, 16),
1523
+ 'max_model_len-8192-sw-None': (16, 16),
1524
+ },
1525
+ 'q_head-64_kv_head-4_head-256': {
1526
+ 'max_model_len-256-sw-None': (1, 8),
1527
+ 'max_model_len-512-sw-None': (2, 8),
1528
+ 'max_model_len-1024-sw-None': (4, 8),
1529
+ 'max_model_len-2048-sw-None': (8, 16),
1530
+ 'max_model_len-4096-sw-None': (16, 8),
1531
+ 'max_model_len-8192-sw-None': (16, 16),
1532
+ },
1533
+ 'q_head-32_kv_head-8_head-128': {
1534
+ 'max_model_len-256-sw-None': (1, 32),
1535
+ 'max_model_len-512-sw-None': (2, 32),
1536
+ 'max_model_len-1024-sw-None': (4, 32),
1537
+ 'max_model_len-2048-sw-None': (8, 32),
1538
+ 'max_model_len-4096-sw-None': (16, 32),
1539
+ 'max_model_len-8192-sw-None': (16, 32),
1540
+ },
1541
+ },
1542
+ },
1543
+ },
1544
+ 'TPU v6e': {
1545
+ 128: {
1546
+ 'q_bfloat16_kv_float8_e4m3fn': {
1547
+ 'q_head-8_kv_head-4_head-256': {
1548
+ 'max_model_len-2048-sw-None': (16, 32),
1549
+ 'max_model_len-4096-sw-None': (32, 128),
1550
+ 'max_model_len-128-sw-None': (1, 32),
1551
+ 'max_model_len-8192-sw-None': (32, 128),
1552
+ 'max_model_len-256-sw-None': (2, 32),
1553
+ 'max_model_len-512-sw-None': (4, 32),
1554
+ 'max_model_len-1024-sw-None': (8, 64),
1555
+ },
1556
+ 'q_head-16_kv_head-4_head-128': {
1557
+ 'max_model_len-256-sw-None': (2, 32),
1558
+ 'max_model_len-512-sw-None': (4, 32),
1559
+ 'max_model_len-1024-sw-None': (8, 32),
1560
+ 'max_model_len-2048-sw-None': (16, 32),
1561
+ 'max_model_len-4096-sw-None': (32, 32),
1562
+ 'max_model_len-8192-sw-None': (32, 32),
1563
+ 'max_model_len-128-sw-None': (1, 32),
1564
+ },
1565
+ 'q_head-32_kv_head-16_head-256': {
1566
+ 'max_model_len-4096-sw-None': (8, 16),
1567
+ 'max_model_len-8192-sw-None': (8, 16),
1568
+ 'max_model_len-128-sw-None': (1, 16),
1569
+ 'max_model_len-256-sw-None': (2, 16),
1570
+ 'max_model_len-512-sw-None': (4, 16),
1571
+ 'max_model_len-1024-sw-None': (8, 16),
1572
+ 'max_model_len-2048-sw-None': (8, 16),
1573
+ },
1574
+ 'q_head-32_kv_head-2_head-256': {
1575
+ 'max_model_len-1024-sw-None': (8, 16),
1576
+ 'max_model_len-2048-sw-None': (16, 16),
1577
+ 'max_model_len-4096-sw-None': (32, 32),
1578
+ 'max_model_len-8192-sw-None': (32, 32),
1579
+ 'max_model_len-128-sw-None': (1, 16),
1580
+ 'max_model_len-256-sw-None': (2, 16),
1581
+ 'max_model_len-512-sw-None': (4, 16),
1582
+ },
1583
+ 'q_head-64_kv_head-2_head-128': {
1584
+ 'max_model_len-4096-sw-None': (32, 16),
1585
+ 'max_model_len-8192-sw-None': (32, 16),
1586
+ 'max_model_len-128-sw-None': (1, 16),
1587
+ 'max_model_len-256-sw-None': (2, 16),
1588
+ 'max_model_len-512-sw-None': (4, 16),
1589
+ 'max_model_len-1024-sw-None': (8, 16),
1590
+ 'max_model_len-2048-sw-None': (16, 16),
1591
+ },
1592
+ 'q_head-64_kv_head-16_head-128': {
1593
+ 'max_model_len-256-sw-None': (2, 16),
1594
+ 'max_model_len-512-sw-None': (4, 16),
1595
+ 'max_model_len-1024-sw-None': (8, 16),
1596
+ 'max_model_len-2048-sw-None': (16, 16),
1597
+ 'max_model_len-4096-sw-None': (16, 16),
1598
+ 'max_model_len-8192-sw-None': (16, 16),
1599
+ 'max_model_len-128-sw-None': (1, 16),
1600
+ },
1601
+ 'q_head-128_kv_head-8_head-256': {
1602
+ 'max_model_len-1024-sw-None': (8, 8),
1603
+ 'max_model_len-2048-sw-None': (16, 8),
1604
+ 'max_model_len-4096-sw-None': (16, 8),
1605
+ 'max_model_len-8192-sw-None': (16, 8),
1606
+ 'max_model_len-128-sw-None': (1, 8),
1607
+ 'max_model_len-256-sw-None': (2, 8),
1608
+ 'max_model_len-512-sw-None': (4, 8),
1609
+ },
1610
+ 'q_head-4_kv_head-2_head-256': {
1611
+ 'max_model_len-1024-sw-None': (8, 64),
1612
+ 'max_model_len-2048-sw-None': (16, 64),
1613
+ 'max_model_len-4096-sw-None': (32, 128),
1614
+ 'max_model_len-8192-sw-None': (32, 128),
1615
+ 'max_model_len-256-sw-None': (2, 32),
1616
+ 'max_model_len-128-sw-None': (1, 32),
1617
+ 'max_model_len-512-sw-None': (4, 32),
1618
+ },
1619
+ 'q_head-128_kv_head-2_head-128': {
1620
+ 'max_model_len-128-sw-None': (1, 8),
1621
+ 'max_model_len-256-sw-None': (2, 8),
1622
+ 'max_model_len-512-sw-None': (4, 8),
1623
+ 'max_model_len-1024-sw-None': (8, 8),
1624
+ 'max_model_len-2048-sw-None': (16, 16),
1625
+ 'max_model_len-4096-sw-None': (16, 16),
1626
+ 'max_model_len-8192-sw-None': (16, 16),
1627
+ },
1628
+ 'q_head-64_kv_head-2_head-256': {
1629
+ 'max_model_len-128-sw-None': (1, 8),
1630
+ 'max_model_len-256-sw-None': (2, 8),
1631
+ 'max_model_len-512-sw-None': (4, 8),
1632
+ 'max_model_len-1024-sw-None': (8, 8),
1633
+ 'max_model_len-2048-sw-None': (16, 16),
1634
+ 'max_model_len-4096-sw-None': (32, 8),
1635
+ 'max_model_len-8192-sw-None': (32, 16),
1636
+ },
1637
+ 'q_head-128_kv_head-16_head-128': {
1638
+ 'max_model_len-128-sw-None': (1, 8),
1639
+ 'max_model_len-256-sw-None': (2, 8),
1640
+ 'max_model_len-512-sw-None': (4, 8),
1641
+ 'max_model_len-1024-sw-None': (8, 8),
1642
+ 'max_model_len-2048-sw-None': (16, 8),
1643
+ 'max_model_len-4096-sw-None': (16, 8),
1644
+ 'max_model_len-8192-sw-None': (16, 8),
1645
+ },
1646
+ 'q_head-32_kv_head-4_head-128': {
1647
+ 'max_model_len-128-sw-None': (1, 32),
1648
+ 'max_model_len-256-sw-None': (2, 32),
1649
+ 'max_model_len-512-sw-None': (4, 32),
1650
+ 'max_model_len-1024-sw-None': (8, 32),
1651
+ 'max_model_len-2048-sw-None': (16, 32),
1652
+ 'max_model_len-4096-sw-None': (32, 32),
1653
+ 'max_model_len-8192-sw-None': (32, 32),
1654
+ },
1655
+ 'q_head-8_kv_head-4_head-128': {
1656
+ 'max_model_len-128-sw-None': (1, 64),
1657
+ 'max_model_len-256-sw-None': (2, 64),
1658
+ 'max_model_len-512-sw-None': (4, 64),
1659
+ 'max_model_len-1024-sw-None': (8, 64),
1660
+ 'max_model_len-2048-sw-None': (16, 64),
1661
+ 'max_model_len-4096-sw-None': (32, 128),
1662
+ 'max_model_len-8192-sw-None': (32, 128),
1663
+ },
1664
+ 'q_head-8_kv_head-2_head-128': {
1665
+ 'max_model_len-128-sw-None': (1, 32),
1666
+ 'max_model_len-256-sw-None': (2, 32),
1667
+ 'max_model_len-512-sw-None': (4, 32),
1668
+ 'max_model_len-1024-sw-None': (8, 32),
1669
+ 'max_model_len-2048-sw-None': (16, 32),
1670
+ 'max_model_len-4096-sw-None': (32, 32),
1671
+ 'max_model_len-8192-sw-None': (32, 32),
1672
+ },
1673
+ 'q_head-64_kv_head-16_head-256': {
1674
+ 'max_model_len-128-sw-None': (1, 8),
1675
+ 'max_model_len-256-sw-None': (2, 8),
1676
+ 'max_model_len-512-sw-None': (4, 8),
1677
+ 'max_model_len-1024-sw-None': (8, 8),
1678
+ 'max_model_len-2048-sw-None': (8, 8),
1679
+ 'max_model_len-4096-sw-None': (8, 8),
1680
+ 'max_model_len-8192-sw-None': (8, 8),
1681
+ },
1682
+ 'q_head-2_kv_head-2_head-128': {
1683
+ 'max_model_len-128-sw-None': (1, 64),
1684
+ 'max_model_len-256-sw-None': (2, 128),
1685
+ 'max_model_len-512-sw-None': (4, 64),
1686
+ 'max_model_len-1024-sw-None': (8, 64),
1687
+ 'max_model_len-2048-sw-None': (16, 128),
1688
+ 'max_model_len-4096-sw-None': (32, 128),
1689
+ 'max_model_len-8192-sw-None': (32, 128),
1690
+ },
1691
+ 'q_head-16_kv_head-4_head-256': {
1692
+ 'max_model_len-128-sw-None': (1, 16),
1693
+ 'max_model_len-256-sw-None': (2, 32),
1694
+ 'max_model_len-512-sw-None': (4, 32),
1695
+ 'max_model_len-1024-sw-None': (8, 32),
1696
+ 'max_model_len-2048-sw-None': (16, 32),
1697
+ 'max_model_len-4096-sw-None': (32, 32),
1698
+ 'max_model_len-8192-sw-None': (32, 32),
1699
+ },
1700
+ 'q_head-64_kv_head-32_head-128': {
1701
+ 'max_model_len-128-sw-None': (1, 8),
1702
+ 'max_model_len-256-sw-None': (2, 8),
1703
+ 'max_model_len-512-sw-None': (4, 8),
1704
+ 'max_model_len-1024-sw-None': (8, 8),
1705
+ 'max_model_len-2048-sw-None': (8, 8),
1706
+ 'max_model_len-4096-sw-None': (8, 8),
1707
+ 'max_model_len-8192-sw-None': (8, 8),
1708
+ },
1709
+ 'q_head-16_kv_head-2_head-256': {
1710
+ 'max_model_len-256-sw-None': (2, 32),
1711
+ 'max_model_len-512-sw-None': (4, 32),
1712
+ 'max_model_len-128-sw-None': (1, 16),
1713
+ 'max_model_len-1024-sw-None': (8, 32),
1714
+ 'max_model_len-2048-sw-None': (16, 32),
1715
+ 'max_model_len-4096-sw-None': (32, 32),
1716
+ 'max_model_len-8192-sw-None': (32, 32),
1717
+ },
1718
+ 'q_head-32_kv_head-16_head-128': {
1719
+ 'max_model_len-4096-sw-None': (16, 32),
1720
+ 'max_model_len-8192-sw-None': (16, 32),
1721
+ 'max_model_len-128-sw-None': (1, 32),
1722
+ 'max_model_len-256-sw-None': (2, 32),
1723
+ 'max_model_len-512-sw-None': (4, 32),
1724
+ 'max_model_len-1024-sw-None': (8, 32),
1725
+ 'max_model_len-2048-sw-None': (16, 32),
1726
+ },
1727
+ 'q_head-32_kv_head-2_head-128': {
1728
+ 'max_model_len-1024-sw-None': (8, 32),
1729
+ 'max_model_len-2048-sw-None': (16, 32),
1730
+ 'max_model_len-128-sw-None': (1, 32),
1731
+ 'max_model_len-256-sw-None': (2, 32),
1732
+ 'max_model_len-512-sw-None': (4, 32),
1733
+ 'max_model_len-4096-sw-None': (32, 32),
1734
+ 'max_model_len-8192-sw-None': (32, 32),
1735
+ },
1736
+ 'q_head-64_kv_head-8_head-256': {
1737
+ 'max_model_len-256-sw-None': (2, 16),
1738
+ 'max_model_len-512-sw-None': (4, 16),
1739
+ 'max_model_len-1024-sw-None': (8, 16),
1740
+ 'max_model_len-2048-sw-None': (16, 16),
1741
+ 'max_model_len-128-sw-None': (1, 16),
1742
+ 'max_model_len-4096-sw-None': (16, 16),
1743
+ 'max_model_len-8192-sw-None': (16, 16),
1744
+ },
1745
+ 'q_head-128_kv_head-8_head-128': {
1746
+ 'max_model_len-2048-sw-None': (16, 16),
1747
+ 'max_model_len-4096-sw-None': (32, 8),
1748
+ 'max_model_len-8192-sw-None': (16, 16),
1749
+ 'max_model_len-128-sw-None': (1, 8),
1750
+ 'max_model_len-256-sw-None': (2, 8),
1751
+ 'max_model_len-512-sw-None': (4, 16),
1752
+ 'max_model_len-1024-sw-None': (8, 16),
1753
+ },
1754
+ 'q_head-128_kv_head-2_head-256': {
1755
+ 'max_model_len-128-sw-None': (1, 8),
1756
+ 'max_model_len-256-sw-None': (2, 8),
1757
+ 'max_model_len-512-sw-None': (4, 8),
1758
+ 'max_model_len-1024-sw-None': (8, 8),
1759
+ 'max_model_len-2048-sw-None': (16, 8),
1760
+ 'max_model_len-4096-sw-None': (32, 8),
1761
+ 'max_model_len-8192-sw-None': (32, 8),
1762
+ },
1763
+ 'q_head-16_kv_head-8_head-128': {
1764
+ 'max_model_len-128-sw-None': (1, 64),
1765
+ 'max_model_len-256-sw-None': (2, 32),
1766
+ 'max_model_len-512-sw-None': (4, 32),
1767
+ 'max_model_len-1024-sw-None': (8, 32),
1768
+ 'max_model_len-2048-sw-None': (16, 64),
1769
+ 'max_model_len-4096-sw-None': (32, 64),
1770
+ 'max_model_len-8192-sw-None': (32, 64),
1771
+ },
1772
+ 'q_head-64_kv_head-4_head-128': {
1773
+ 'max_model_len-128-sw-None': (1, 16),
1774
+ 'max_model_len-256-sw-None': (2, 16),
1775
+ 'max_model_len-512-sw-None': (4, 16),
1776
+ 'max_model_len-1024-sw-None': (8, 16),
1777
+ 'max_model_len-2048-sw-None': (16, 16),
1778
+ 'max_model_len-4096-sw-None': (32, 16),
1779
+ 'max_model_len-8192-sw-None': (32, 16),
1780
+ },
1781
+ 'q_head-16_kv_head-2_head-128': {
1782
+ 'max_model_len-128-sw-None': (1, 32),
1783
+ 'max_model_len-256-sw-None': (2, 32),
1784
+ 'max_model_len-512-sw-None': (4, 32),
1785
+ 'max_model_len-1024-sw-None': (8, 32),
1786
+ 'max_model_len-2048-sw-None': (16, 32),
1787
+ 'max_model_len-4096-sw-None': (32, 32),
1788
+ 'max_model_len-8192-sw-None': (32, 32),
1789
+ },
1790
+ 'q_head-32_kv_head-4_head-256': {
1791
+ 'max_model_len-128-sw-None': (1, 16),
1792
+ 'max_model_len-256-sw-None': (2, 16),
1793
+ 'max_model_len-512-sw-None': (4, 16),
1794
+ 'max_model_len-1024-sw-None': (8, 32),
1795
+ 'max_model_len-2048-sw-None': (16, 32),
1796
+ 'max_model_len-4096-sw-None': (16, 32),
1797
+ 'max_model_len-8192-sw-None': (16, 32),
1798
+ },
1799
+ 'q_head-8_kv_head-2_head-256': {
1800
+ 'max_model_len-128-sw-None': (1, 32),
1801
+ 'max_model_len-256-sw-None': (2, 32),
1802
+ 'max_model_len-512-sw-None': (4, 32),
1803
+ 'max_model_len-1024-sw-None': (8, 32),
1804
+ 'max_model_len-2048-sw-None': (16, 32),
1805
+ 'max_model_len-4096-sw-None': (32, 128),
1806
+ 'max_model_len-8192-sw-None': (32, 128),
1807
+ },
1808
+ 'q_head-2_kv_head-2_head-256': {
1809
+ 'max_model_len-128-sw-None': (1, 32),
1810
+ 'max_model_len-256-sw-None': (2, 32),
1811
+ 'max_model_len-512-sw-None': (4, 32),
1812
+ 'max_model_len-1024-sw-None': (8, 32),
1813
+ 'max_model_len-2048-sw-None': (16, 64),
1814
+ 'max_model_len-4096-sw-None': (32, 128),
1815
+ 'max_model_len-8192-sw-None': (32, 128),
1816
+ },
1817
+ 'q_head-128_kv_head-4_head-128': {
1818
+ 'max_model_len-128-sw-None': (1, 8),
1819
+ 'max_model_len-256-sw-None': (2, 8),
1820
+ 'max_model_len-512-sw-None': (4, 8),
1821
+ 'max_model_len-1024-sw-None': (8, 8),
1822
+ 'max_model_len-2048-sw-None': (16, 16),
1823
+ 'max_model_len-4096-sw-None': (32, 8),
1824
+ 'max_model_len-8192-sw-None': (32, 8),
1825
+ },
1826
+ 'q_head-4_kv_head-2_head-128': {
1827
+ 'max_model_len-1024-sw-None': (8, 64),
1828
+ 'max_model_len-128-sw-None': (1, 64),
1829
+ 'max_model_len-256-sw-None': (2, 128),
1830
+ 'max_model_len-512-sw-None': (4, 128),
1831
+ 'max_model_len-2048-sw-None': (16, 128),
1832
+ 'max_model_len-4096-sw-None': (32, 128),
1833
+ 'max_model_len-8192-sw-None': (32, 128),
1834
+ },
1835
+ 'q_head-16_kv_head-8_head-256': {
1836
+ 'max_model_len-128-sw-None': (1, 32),
1837
+ 'max_model_len-256-sw-None': (2, 32),
1838
+ 'max_model_len-512-sw-None': (4, 32),
1839
+ 'max_model_len-1024-sw-None': (8, 32),
1840
+ 'max_model_len-2048-sw-None': (16, 64),
1841
+ 'max_model_len-4096-sw-None': (16, 64),
1842
+ 'max_model_len-8192-sw-None': (16, 64),
1843
+ },
1844
+ 'q_head-64_kv_head-4_head-256': {
1845
+ 'max_model_len-128-sw-None': (1, 8),
1846
+ 'max_model_len-256-sw-None': (2, 8),
1847
+ 'max_model_len-512-sw-None': (4, 8),
1848
+ 'max_model_len-1024-sw-None': (8, 16),
1849
+ 'max_model_len-2048-sw-None': (16, 16),
1850
+ 'max_model_len-4096-sw-None': (16, 16),
1851
+ 'max_model_len-8192-sw-None': (16, 16),
1852
+ },
1853
+ 'q_head-32_kv_head-8_head-128': {
1854
+ 'max_model_len-128-sw-None': (1, 32),
1855
+ 'max_model_len-256-sw-None': (2, 32),
1856
+ 'max_model_len-512-sw-None': (4, 32),
1857
+ 'max_model_len-1024-sw-None': (8, 32),
1858
+ 'max_model_len-2048-sw-None': (16, 32),
1859
+ 'max_model_len-4096-sw-None': (32, 32),
1860
+ 'max_model_len-8192-sw-None': (32, 32),
1861
+ },
1862
+ 'q_head-128_kv_head-4_head-256': {
1863
+ 'max_model_len-128-sw-None': (1, 8),
1864
+ 'max_model_len-256-sw-None': (2, 8),
1865
+ 'max_model_len-512-sw-None': (4, 8),
1866
+ 'max_model_len-1024-sw-None': (8, 8),
1867
+ 'max_model_len-2048-sw-None': (16, 16),
1868
+ 'max_model_len-4096-sw-None': (16, 16),
1869
+ 'max_model_len-8192-sw-None': (16, 16),
1870
+ },
1871
+ 'q_head-64_kv_head-8_head-128': {
1872
+ 'max_model_len-128-sw-None': (1, 16),
1873
+ 'max_model_len-256-sw-None': (2, 16),
1874
+ 'max_model_len-512-sw-None': (4, 16),
1875
+ 'max_model_len-1024-sw-None': (8, 16),
1876
+ 'max_model_len-2048-sw-None': (16, 32),
1877
+ 'max_model_len-4096-sw-None': (32, 16),
1878
+ 'max_model_len-8192-sw-None': (32, 16),
1879
+ },
1880
+ 'q_head-32_kv_head-8_head-256': {
1881
+ 'max_model_len-128-sw-None': (1, 16),
1882
+ 'max_model_len-256-sw-None': (2, 16),
1883
+ 'max_model_len-512-sw-None': (4, 16),
1884
+ 'max_model_len-1024-sw-None': (8, 32),
1885
+ 'max_model_len-2048-sw-None': (16, 32),
1886
+ 'max_model_len-4096-sw-None': (16, 32),
1887
+ 'max_model_len-8192-sw-None': (16, 32),
1888
+ },
1889
+ },
1890
+ 'q_bfloat16_kv_bfloat16': {
1891
+ 'q_head-8_kv_head-2_head-128': {
1892
+ 'max_model_len-8192-sw-None': (32, 128),
1893
+ 'max_model_len-128-sw-None': (1, 64),
1894
+ 'max_model_len-512-sw-None': (4, 64),
1895
+ 'max_model_len-256-sw-None': (2, 32),
1896
+ 'max_model_len-1024-sw-None': (8, 64),
1897
+ 'max_model_len-2048-sw-None': (16, 32),
1898
+ 'max_model_len-4096-sw-None': (32, 64),
1899
+ },
1900
+ 'q_head-16_kv_head-2_head-128': {
1901
+ 'max_model_len-128-sw-None': (1, 32),
1902
+ 'max_model_len-256-sw-None': (2, 32),
1903
+ 'max_model_len-512-sw-None': (4, 32),
1904
+ 'max_model_len-1024-sw-None': (8, 32),
1905
+ 'max_model_len-2048-sw-None': (16, 32),
1906
+ 'max_model_len-4096-sw-None': (32, 32),
1907
+ 'max_model_len-8192-sw-None': (32, 32),
1908
+ },
1909
+ 'q_head-16_kv_head-8_head-256': {
1910
+ 'max_model_len-8192-sw-None': (8, 64),
1911
+ 'max_model_len-2048-sw-None': (8, 64),
1912
+ 'max_model_len-4096-sw-None': (8, 64),
1913
+ 'max_model_len-128-sw-None': (1, 32),
1914
+ 'max_model_len-256-sw-None': (2, 32),
1915
+ 'max_model_len-512-sw-None': (4, 32),
1916
+ 'max_model_len-1024-sw-None': (8, 64),
1917
+ },
1918
+ 'q_head-32_kv_head-1_head-256': {
1919
+ 'max_model_len-1024-sw-None': (8, 16),
1920
+ 'max_model_len-2048-sw-None': (16, 16),
1921
+ 'max_model_len-4096-sw-None': (32, 32),
1922
+ 'max_model_len-8192-sw-None': (32, 32),
1923
+ 'max_model_len-128-sw-None': (1, 16),
1924
+ 'max_model_len-256-sw-None': (2, 16),
1925
+ 'max_model_len-512-sw-None': (4, 16),
1926
+ },
1927
+ 'q_head-32_kv_head-8_head-256': {
1928
+ 'max_model_len-128-sw-None': (1, 16),
1929
+ 'max_model_len-256-sw-None': (2, 32),
1930
+ 'max_model_len-512-sw-None': (4, 32),
1931
+ 'max_model_len-1024-sw-None': (8, 32),
1932
+ 'max_model_len-2048-sw-None': (8, 32),
1933
+ 'max_model_len-4096-sw-None': (8, 32),
1934
+ 'max_model_len-8192-sw-None': (8, 32),
1935
+ },
1936
+ 'q_head-64_kv_head-1_head-128': {
1937
+ 'max_model_len-4096-sw-None': (32, 16),
1938
+ 'max_model_len-8192-sw-None': (32, 32),
1939
+ 'max_model_len-128-sw-None': (1, 16),
1940
+ 'max_model_len-256-sw-None': (2, 16),
1941
+ 'max_model_len-512-sw-None': (4, 16),
1942
+ 'max_model_len-1024-sw-None': (8, 16),
1943
+ 'max_model_len-2048-sw-None': (16, 16),
1944
+ },
1945
+ 'q_head-64_kv_head-8_head-128': {
1946
+ 'max_model_len-512-sw-None': (4, 32),
1947
+ 'max_model_len-1024-sw-None': (8, 32),
1948
+ 'max_model_len-2048-sw-None': (16, 32),
1949
+ 'max_model_len-4096-sw-None': (16, 32),
1950
+ 'max_model_len-8192-sw-None': (16, 32),
1951
+ 'max_model_len-128-sw-None': (1, 32),
1952
+ 'max_model_len-256-sw-None': (2, 16),
1953
+ },
1954
+ 'q_head-128_kv_head-4_head-256': {
1955
+ 'max_model_len-2048-sw-None': (16, 16),
1956
+ 'max_model_len-4096-sw-None': (16, 16),
1957
+ 'max_model_len-8192-sw-None': (16, 16),
1958
+ 'max_model_len-128-sw-None': (1, 8),
1959
+ 'max_model_len-256-sw-None': (2, 8),
1960
+ 'max_model_len-512-sw-None': (4, 8),
1961
+ 'max_model_len-1024-sw-None': (8, 16),
1962
+ },
1963
+ 'q_head-128_kv_head-1_head-128': {
1964
+ 'max_model_len-128-sw-None': (1, 8),
1965
+ 'max_model_len-256-sw-None': (2, 8),
1966
+ 'max_model_len-512-sw-None': (4, 8),
1967
+ 'max_model_len-1024-sw-None': (8, 8),
1968
+ 'max_model_len-2048-sw-None': (16, 8),
1969
+ 'max_model_len-4096-sw-None': (32, 16),
1970
+ 'max_model_len-8192-sw-None': (32, 16),
1971
+ },
1972
+ 'q_head-8_kv_head-2_head-256': {
1973
+ 'max_model_len-128-sw-None': (1, 32),
1974
+ 'max_model_len-256-sw-None': (2, 32),
1975
+ 'max_model_len-512-sw-None': (4, 32),
1976
+ 'max_model_len-1024-sw-None': (8, 32),
1977
+ 'max_model_len-2048-sw-None': (16, 128),
1978
+ 'max_model_len-4096-sw-None': (16, 128),
1979
+ 'max_model_len-8192-sw-None': (16, 128),
1980
+ },
1981
+ 'q_head-32_kv_head-16_head-128': {
1982
+ 'max_model_len-128-sw-None': (1, 32),
1983
+ 'max_model_len-256-sw-None': (2, 32),
1984
+ 'max_model_len-512-sw-None': (4, 32),
1985
+ 'max_model_len-1024-sw-None': (8, 32),
1986
+ 'max_model_len-2048-sw-None': (8, 32),
1987
+ 'max_model_len-4096-sw-None': (8, 32),
1988
+ 'max_model_len-8192-sw-None': (8, 32),
1989
+ },
1990
+ 'q_head-2_kv_head-1_head-256': {
1991
+ 'max_model_len-128-sw-None': (1, 64),
1992
+ 'max_model_len-512-sw-None': (4, 128),
1993
+ 'max_model_len-4096-sw-None': (32, 128),
1994
+ 'max_model_len-256-sw-None': (2, 64),
1995
+ 'max_model_len-1024-sw-None': (8, 128),
1996
+ 'max_model_len-8192-sw-None': (32, 128),
1997
+ 'max_model_len-2048-sw-None': (16, 128),
1998
+ },
1999
+ 'q_head-2_kv_head-1_head-128': {
2000
+ 'max_model_len-512-sw-None': (4, 128),
2001
+ 'max_model_len-256-sw-None': (2, 128),
2002
+ 'max_model_len-2048-sw-None': (16, 128),
2003
+ 'max_model_len-4096-sw-None': (32, 128),
2004
+ 'max_model_len-128-sw-None': (1, 64),
2005
+ 'max_model_len-8192-sw-None': (32, 128),
2006
+ 'max_model_len-1024-sw-None': (8, 128),
2007
+ },
2008
+ 'q_head-8_kv_head-1_head-256': {
2009
+ 'max_model_len-512-sw-None': (4, 32),
2010
+ 'max_model_len-1024-sw-None': (8, 32),
2011
+ 'max_model_len-256-sw-None': (2, 32),
2012
+ 'max_model_len-2048-sw-None': (16, 32),
2013
+ 'max_model_len-128-sw-None': (1, 32),
2014
+ 'max_model_len-4096-sw-None': (32, 128),
2015
+ 'max_model_len-8192-sw-None': (32, 32),
2016
+ },
2017
+ 'q_head-64_kv_head-1_head-256': {
2018
+ 'max_model_len-128-sw-None': (1, 8),
2019
+ 'max_model_len-256-sw-None': (2, 8),
2020
+ 'max_model_len-512-sw-None': (4, 8),
2021
+ 'max_model_len-1024-sw-None': (8, 8),
2022
+ 'max_model_len-2048-sw-None': (16, 16),
2023
+ 'max_model_len-4096-sw-None': (32, 16),
2024
+ 'max_model_len-8192-sw-None': (32, 16),
2025
+ },
2026
+ 'q_head-4_kv_head-2_head-128': {
2027
+ 'max_model_len-8192-sw-None': (32, 128),
2028
+ 'max_model_len-128-sw-None': (1, 128),
2029
+ 'max_model_len-512-sw-None': (4, 128),
2030
+ 'max_model_len-4096-sw-None': (32, 128),
2031
+ 'max_model_len-256-sw-None': (2, 64),
2032
+ 'max_model_len-1024-sw-None': (8, 128),
2033
+ 'max_model_len-2048-sw-None': (16, 128),
2034
+ },
2035
+ 'q_head-4_kv_head-2_head-256': {
2036
+ 'max_model_len-256-sw-None': (2, 32),
2037
+ 'max_model_len-1024-sw-None': (8, 128),
2038
+ 'max_model_len-8192-sw-None': (16, 128),
2039
+ 'max_model_len-512-sw-None': (4, 64),
2040
+ 'max_model_len-2048-sw-None': (16, 128),
2041
+ 'max_model_len-128-sw-None': (1, 32),
2042
+ 'max_model_len-4096-sw-None': (16, 128),
2043
+ },
2044
+ 'q_head-8_kv_head-4_head-128': {
2045
+ 'max_model_len-512-sw-None': (4, 128),
2046
+ 'max_model_len-128-sw-None': (1, 64),
2047
+ 'max_model_len-256-sw-None': (2, 64),
2048
+ 'max_model_len-1024-sw-None': (8, 64),
2049
+ 'max_model_len-2048-sw-None': (16, 128),
2050
+ 'max_model_len-4096-sw-None': (16, 128),
2051
+ 'max_model_len-8192-sw-None': (32, 128),
2052
+ },
2053
+ 'q_head-8_kv_head-4_head-256': {
2054
+ 'max_model_len-1024-sw-None': (8, 128),
2055
+ 'max_model_len-2048-sw-None': (16, 128),
2056
+ 'max_model_len-128-sw-None': (1, 32),
2057
+ 'max_model_len-256-sw-None': (2, 32),
2058
+ 'max_model_len-512-sw-None': (4, 32),
2059
+ 'max_model_len-4096-sw-None': (16, 128),
2060
+ 'max_model_len-8192-sw-None': (16, 128),
2061
+ },
2062
+ 'q_head-8_kv_head-1_head-128': {
2063
+ 'max_model_len-256-sw-None': (2, 64),
2064
+ 'max_model_len-8192-sw-None': (32, 32),
2065
+ 'max_model_len-512-sw-None': (4, 32),
2066
+ 'max_model_len-128-sw-None': (1, 32),
2067
+ 'max_model_len-1024-sw-None': (8, 32),
2068
+ 'max_model_len-2048-sw-None': (16, 32),
2069
+ 'max_model_len-4096-sw-None': (32, 32),
2070
+ },
2071
+ 'q_head-32_kv_head-2_head-128': {
2072
+ 'max_model_len-128-sw-None': (1, 32),
2073
+ 'max_model_len-256-sw-None': (2, 32),
2074
+ 'max_model_len-512-sw-None': (4, 32),
2075
+ 'max_model_len-1024-sw-None': (8, 32),
2076
+ 'max_model_len-2048-sw-None': (16, 32),
2077
+ 'max_model_len-4096-sw-None': (32, 32),
2078
+ 'max_model_len-8192-sw-None': (32, 32),
2079
+ },
2080
+ 'q_head-128_kv_head-8_head-128': {
2081
+ 'max_model_len-128-sw-None': (1, 16),
2082
+ 'max_model_len-256-sw-None': (2, 8),
2083
+ 'max_model_len-512-sw-None': (4, 16),
2084
+ 'max_model_len-1024-sw-None': (8, 16),
2085
+ 'max_model_len-2048-sw-None': (16, 16),
2086
+ 'max_model_len-4096-sw-None': (16, 16),
2087
+ 'max_model_len-8192-sw-None': (16, 16),
2088
+ },
2089
+ 'q_head-64_kv_head-8_head-256': {
2090
+ 'max_model_len-128-sw-None': (1, 16),
2091
+ 'max_model_len-256-sw-None': (2, 16),
2092
+ 'max_model_len-512-sw-None': (4, 16),
2093
+ 'max_model_len-1024-sw-None': (8, 16),
2094
+ 'max_model_len-2048-sw-None': (8, 16),
2095
+ 'max_model_len-4096-sw-None': (8, 16),
2096
+ 'max_model_len-8192-sw-None': (8, 16),
2097
+ },
2098
+ 'q_head-16_kv_head-2_head-256': {
2099
+ 'max_model_len-128-sw-None': (1, 32),
2100
+ 'max_model_len-256-sw-None': (2, 32),
2101
+ 'max_model_len-512-sw-None': (4, 32),
2102
+ 'max_model_len-1024-sw-None': (8, 32),
2103
+ 'max_model_len-2048-sw-None': (16, 32),
2104
+ 'max_model_len-4096-sw-None': (16, 32),
2105
+ 'max_model_len-8192-sw-None': (16, 32),
2106
+ },
2107
+ 'q_head-4_kv_head-1_head-128': {
2108
+ 'max_model_len-1024-sw-None': (8, 128),
2109
+ 'max_model_len-8192-sw-None': (32, 128),
2110
+ 'max_model_len-2048-sw-None': (16, 128),
2111
+ 'max_model_len-128-sw-None': (1, 128),
2112
+ 'max_model_len-4096-sw-None': (32, 128),
2113
+ 'max_model_len-256-sw-None': (2, 128),
2114
+ 'max_model_len-512-sw-None': (4, 128),
2115
+ },
2116
+ 'q_head-16_kv_head-1_head-256': {
2117
+ 'max_model_len-256-sw-None': (2, 32),
2118
+ 'max_model_len-512-sw-None': (4, 32),
2119
+ 'max_model_len-1024-sw-None': (8, 32),
2120
+ 'max_model_len-2048-sw-None': (16, 32),
2121
+ 'max_model_len-128-sw-None': (1, 32),
2122
+ 'max_model_len-4096-sw-None': (32, 32),
2123
+ 'max_model_len-8192-sw-None': (32, 32),
2124
+ },
2125
+ 'q_head-16_kv_head-8_head-128': {
2126
+ 'max_model_len-8192-sw-None': (16, 128),
2127
+ 'max_model_len-128-sw-None': (1, 32),
2128
+ 'max_model_len-256-sw-None': (2, 64),
2129
+ 'max_model_len-512-sw-None': (4, 64),
2130
+ 'max_model_len-1024-sw-None': (8, 64),
2131
+ 'max_model_len-2048-sw-None': (16, 128),
2132
+ 'max_model_len-4096-sw-None': (16, 128),
2133
+ },
2134
+ 'q_head-32_kv_head-1_head-128': {
2135
+ 'max_model_len-1024-sw-None': (8, 32),
2136
+ 'max_model_len-2048-sw-None': (16, 32),
2137
+ 'max_model_len-4096-sw-None': (32, 32),
2138
+ 'max_model_len-128-sw-None': (1, 32),
2139
+ 'max_model_len-256-sw-None': (2, 32),
2140
+ 'max_model_len-8192-sw-None': (32, 32),
2141
+ 'max_model_len-512-sw-None': (4, 32),
2142
+ },
2143
+ 'q_head-32_kv_head-8_head-128': {
2144
+ 'max_model_len-128-sw-None': (1, 32),
2145
+ 'max_model_len-256-sw-None': (2, 32),
2146
+ 'max_model_len-512-sw-None': (4, 32),
2147
+ 'max_model_len-1024-sw-None': (8, 32),
2148
+ 'max_model_len-2048-sw-None': (16, 64),
2149
+ 'max_model_len-4096-sw-None': (16, 64),
2150
+ 'max_model_len-8192-sw-None': (16, 64),
2151
+ },
2152
+ 'q_head-64_kv_head-4_head-256': {
2153
+ 'max_model_len-512-sw-None': (4, 16),
2154
+ 'max_model_len-1024-sw-None': (8, 16),
2155
+ 'max_model_len-128-sw-None': (1, 8),
2156
+ 'max_model_len-2048-sw-None': (16, 16),
2157
+ 'max_model_len-256-sw-None': (2, 16),
2158
+ 'max_model_len-4096-sw-None': (8, 32),
2159
+ 'max_model_len-8192-sw-None': (8, 32),
2160
+ },
2161
+ 'q_head-128_kv_head-4_head-128': {
2162
+ 'max_model_len-2048-sw-None': (16, 16),
2163
+ 'max_model_len-4096-sw-None': (16, 16),
2164
+ 'max_model_len-128-sw-None': (1, 8),
2165
+ 'max_model_len-256-sw-None': (2, 8),
2166
+ 'max_model_len-512-sw-None': (4, 16),
2167
+ 'max_model_len-8192-sw-None': (16, 16),
2168
+ 'max_model_len-1024-sw-None': (8, 16),
2169
+ },
2170
+ 'q_head-4_kv_head-1_head-256': {
2171
+ 'max_model_len-2048-sw-None': (16, 64),
2172
+ 'max_model_len-128-sw-None': (1, 32),
2173
+ 'max_model_len-4096-sw-None': (32, 128),
2174
+ 'max_model_len-256-sw-None': (2, 32),
2175
+ 'max_model_len-8192-sw-None': (32, 128),
2176
+ 'max_model_len-512-sw-None': (4, 32),
2177
+ 'max_model_len-1024-sw-None': (8, 32),
2178
+ },
2179
+ 'q_head-128_kv_head-1_head-256': {
2180
+ 'max_model_len-128-sw-None': (1, 8),
2181
+ 'max_model_len-256-sw-None': (2, 8),
2182
+ 'max_model_len-512-sw-None': (4, 8),
2183
+ 'max_model_len-1024-sw-None': (8, 8),
2184
+ 'max_model_len-2048-sw-None': (16, 8),
2185
+ 'max_model_len-4096-sw-None': (32, 8),
2186
+ 'max_model_len-8192-sw-None': (32, 8),
2187
+ },
2188
+ 'q_head-32_kv_head-16_head-256': {
2189
+ 'max_model_len-128-sw-None': (1, 16),
2190
+ 'max_model_len-256-sw-None': (2, 16),
2191
+ 'max_model_len-512-sw-None': (4, 16),
2192
+ 'max_model_len-1024-sw-None': (4, 16),
2193
+ 'max_model_len-2048-sw-None': (4, 16),
2194
+ 'max_model_len-4096-sw-None': (4, 16),
2195
+ 'max_model_len-8192-sw-None': (4, 16),
2196
+ },
2197
+ 'q_head-64_kv_head-2_head-128': {
2198
+ 'max_model_len-128-sw-None': (1, 16),
2199
+ 'max_model_len-256-sw-None': (2, 16),
2200
+ 'max_model_len-512-sw-None': (4, 16),
2201
+ 'max_model_len-1024-sw-None': (8, 16),
2202
+ 'max_model_len-2048-sw-None': (16, 32),
2203
+ 'max_model_len-4096-sw-None': (32, 16),
2204
+ 'max_model_len-8192-sw-None': (32, 16),
2205
+ },
2206
+ 'q_head-128_kv_head-8_head-256': {
2207
+ 'max_model_len-128-sw-None': (1, 8),
2208
+ 'max_model_len-256-sw-None': (2, 8),
2209
+ 'max_model_len-512-sw-None': (4, 8),
2210
+ 'max_model_len-1024-sw-None': (8, 8),
2211
+ 'max_model_len-2048-sw-None': (8, 8),
2212
+ 'max_model_len-4096-sw-None': (8, 8),
2213
+ 'max_model_len-8192-sw-None': (8, 8),
2214
+ },
2215
+ 'q_head-32_kv_head-2_head-256': {
2216
+ 'max_model_len-128-sw-None': (1, 16),
2217
+ 'max_model_len-256-sw-None': (2, 16),
2218
+ 'max_model_len-512-sw-None': (4, 16),
2219
+ 'max_model_len-1024-sw-None': (8, 32),
2220
+ 'max_model_len-2048-sw-None': (16, 32),
2221
+ 'max_model_len-4096-sw-None': (16, 32),
2222
+ 'max_model_len-8192-sw-None': (16, 32),
2223
+ },
2224
+ 'q_head-64_kv_head-16_head-128': {
2225
+ 'max_model_len-128-sw-None': (1, 16),
2226
+ 'max_model_len-256-sw-None': (2, 16),
2227
+ 'max_model_len-512-sw-None': (4, 16),
2228
+ 'max_model_len-1024-sw-None': (8, 16),
2229
+ 'max_model_len-2048-sw-None': (8, 16),
2230
+ 'max_model_len-4096-sw-None': (8, 16),
2231
+ 'max_model_len-8192-sw-None': (8, 16),
2232
+ },
2233
+ 'q_head-16_kv_head-4_head-128': {
2234
+ 'max_model_len-128-sw-None': (1, 32),
2235
+ 'max_model_len-256-sw-None': (2, 32),
2236
+ 'max_model_len-512-sw-None': (4, 64),
2237
+ 'max_model_len-1024-sw-None': (8, 32),
2238
+ 'max_model_len-2048-sw-None': (16, 128),
2239
+ 'max_model_len-4096-sw-None': (16, 64),
2240
+ 'max_model_len-8192-sw-None': (16, 64),
2241
+ },
2242
+ 'q_head-128_kv_head-2_head-128': {
2243
+ 'max_model_len-128-sw-None': (1, 8),
2244
+ 'max_model_len-256-sw-None': (2, 8),
2245
+ 'max_model_len-512-sw-None': (4, 8),
2246
+ 'max_model_len-1024-sw-None': (8, 8),
2247
+ 'max_model_len-2048-sw-None': (16, 16),
2248
+ 'max_model_len-4096-sw-None': (32, 8),
2249
+ 'max_model_len-8192-sw-None': (16, 16),
2250
+ },
2251
+ 'q_head-64_kv_head-2_head-256': {
2252
+ 'max_model_len-128-sw-None': (1, 8),
2253
+ 'max_model_len-256-sw-None': (2, 8),
2254
+ 'max_model_len-512-sw-None': (4, 8),
2255
+ 'max_model_len-1024-sw-None': (8, 16),
2256
+ 'max_model_len-2048-sw-None': (16, 16),
2257
+ 'max_model_len-4096-sw-None': (16, 16),
2258
+ 'max_model_len-8192-sw-None': (16, 16),
2259
+ },
2260
+ 'q_head-128_kv_head-16_head-128': {
2261
+ 'max_model_len-128-sw-None': (1, 8),
2262
+ 'max_model_len-256-sw-None': (2, 8),
2263
+ 'max_model_len-512-sw-None': (4, 8),
2264
+ 'max_model_len-1024-sw-None': (8, 8),
2265
+ 'max_model_len-2048-sw-None': (8, 8),
2266
+ 'max_model_len-4096-sw-None': (8, 8),
2267
+ 'max_model_len-8192-sw-None': (8, 8),
2268
+ },
2269
+ 'q_head-32_kv_head-4_head-128': {
2270
+ 'max_model_len-128-sw-None': (1, 32),
2271
+ 'max_model_len-256-sw-None': (2, 32),
2272
+ 'max_model_len-512-sw-None': (4, 32),
2273
+ 'max_model_len-1024-sw-None': (8, 32),
2274
+ 'max_model_len-2048-sw-None': (16, 32),
2275
+ 'max_model_len-4096-sw-None': (16, 32),
2276
+ 'max_model_len-8192-sw-None': (16, 32),
2277
+ },
2278
+ 'q_head-64_kv_head-16_head-256': {
2279
+ 'max_model_len-128-sw-None': (1, 8),
2280
+ 'max_model_len-256-sw-None': (2, 8),
2281
+ 'max_model_len-512-sw-None': (4, 8),
2282
+ 'max_model_len-1024-sw-None': (4, 8),
2283
+ 'max_model_len-2048-sw-None': (4, 8),
2284
+ 'max_model_len-4096-sw-None': (4, 8),
2285
+ 'max_model_len-8192-sw-None': (4, 8),
2286
+ },
2287
+ 'q_head-16_kv_head-4_head-256': {
2288
+ 'max_model_len-128-sw-None': (1, 32),
2289
+ 'max_model_len-256-sw-None': (2, 32),
2290
+ 'max_model_len-512-sw-None': (4, 32),
2291
+ 'max_model_len-1024-sw-None': (8, 64),
2292
+ 'max_model_len-2048-sw-None': (16, 128),
2293
+ 'max_model_len-4096-sw-None': (16, 128),
2294
+ 'max_model_len-8192-sw-None': (16, 128),
2295
+ },
2296
+ 'q_head-64_kv_head-32_head-128': {
2297
+ 'max_model_len-128-sw-None': (1, 8),
2298
+ 'max_model_len-256-sw-None': (2, 8),
2299
+ 'max_model_len-512-sw-None': (4, 8),
2300
+ 'max_model_len-1024-sw-None': (4, 8),
2301
+ 'max_model_len-2048-sw-None': (4, 8),
2302
+ 'max_model_len-4096-sw-None': (4, 8),
2303
+ 'max_model_len-8192-sw-None': (4, 8),
2304
+ },
2305
+ 'q_head-128_kv_head-2_head-256': {
2306
+ 'max_model_len-128-sw-None': (1, 8),
2307
+ 'max_model_len-256-sw-None': (2, 8),
2308
+ 'max_model_len-512-sw-None': (4, 8),
2309
+ 'max_model_len-1024-sw-None': (8, 8),
2310
+ 'max_model_len-2048-sw-None': (16, 16),
2311
+ 'max_model_len-4096-sw-None': (32, 8),
2312
+ 'max_model_len-8192-sw-None': (16, 16),
2313
+ },
2314
+ 'q_head-64_kv_head-4_head-128': {
2315
+ 'max_model_len-128-sw-None': (1, 16),
2316
+ 'max_model_len-256-sw-None': (2, 16),
2317
+ 'max_model_len-512-sw-None': (4, 16),
2318
+ 'max_model_len-1024-sw-None': (8, 32),
2319
+ 'max_model_len-2048-sw-None': (16, 32),
2320
+ 'max_model_len-4096-sw-None': (16, 32),
2321
+ 'max_model_len-8192-sw-None': (16, 32),
2322
+ },
2323
+ 'q_head-16_kv_head-1_head-128': {
2324
+ 'max_model_len-128-sw-None': (1, 32),
2325
+ 'max_model_len-256-sw-None': (2, 32),
2326
+ 'max_model_len-512-sw-None': (4, 32),
2327
+ 'max_model_len-1024-sw-None': (8, 32),
2328
+ 'max_model_len-2048-sw-None': (16, 32),
2329
+ 'max_model_len-4096-sw-None': (32, 32),
2330
+ 'max_model_len-8192-sw-None': (32, 32),
2331
+ },
2332
+ 'q_head-32_kv_head-4_head-256': {
2333
+ 'max_model_len-128-sw-None': (1, 16),
2334
+ 'max_model_len-256-sw-None': (2, 16),
2335
+ 'max_model_len-512-sw-None': (4, 32),
2336
+ 'max_model_len-1024-sw-None': (8, 32),
2337
+ 'max_model_len-2048-sw-None': (16, 32),
2338
+ 'max_model_len-4096-sw-None': (16, 32),
2339
+ 'max_model_len-8192-sw-None': (16, 32),
2340
+ },
2341
+ },
2342
+ },
2343
+ 256: {
2344
+ 'q_bfloat16_kv_bfloat16': {
2345
+ 'q_head-2_kv_head-1_head-256': {
2346
+ 'max_model_len-256-sw-None': (1, 32),
2347
+ 'max_model_len-512-sw-None': (2, 64),
2348
+ 'max_model_len-1024-sw-None': (4, 128),
2349
+ 'max_model_len-2048-sw-None': (8, 64),
2350
+ 'max_model_len-4096-sw-None': (16, 128),
2351
+ 'max_model_len-8192-sw-None': (16, 128),
2352
+ },
2353
+ 'q_head-2_kv_head-1_head-128': {
2354
+ 'max_model_len-1024-sw-None': (4, 128),
2355
+ 'max_model_len-2048-sw-None': (8, 128),
2356
+ 'max_model_len-4096-sw-None': (16, 64),
2357
+ 'max_model_len-8192-sw-None': (16, 64),
2358
+ 'max_model_len-256-sw-None': (1, 128),
2359
+ 'max_model_len-512-sw-None': (2, 128),
2360
+ },
2361
+ 'q_head-8_kv_head-4_head-256': {
2362
+ 'max_model_len-512-sw-None': (2, 32),
2363
+ 'max_model_len-1024-sw-None': (4, 64),
2364
+ 'max_model_len-2048-sw-None': (8, 128),
2365
+ 'max_model_len-4096-sw-None': (8, 128),
2366
+ 'max_model_len-8192-sw-None': (4, 128),
2367
+ 'max_model_len-256-sw-None': (1, 32),
2368
+ },
2369
+ 'q_head-8_kv_head-4_head-128': {
2370
+ 'max_model_len-2048-sw-None': (8, 128),
2371
+ 'max_model_len-4096-sw-None': (8, 128),
2372
+ 'max_model_len-8192-sw-None': (8, 128),
2373
+ 'max_model_len-512-sw-None': (2, 128),
2374
+ 'max_model_len-1024-sw-None': (4, 128),
2375
+ 'max_model_len-256-sw-None': (1, 64),
2376
+ },
2377
+ 'q_head-16_kv_head-2_head-256': {
2378
+ 'max_model_len-8192-sw-None': (8, 32),
2379
+ 'max_model_len-256-sw-None': (1, 32),
2380
+ 'max_model_len-512-sw-None': (2, 32),
2381
+ 'max_model_len-1024-sw-None': (4, 32),
2382
+ 'max_model_len-2048-sw-None': (8, 32),
2383
+ 'max_model_len-4096-sw-None': (8, 32),
2384
+ },
2385
+ 'q_head-32_kv_head-16_head-256': {
2386
+ 'max_model_len-2048-sw-None': (2, 16),
2387
+ 'max_model_len-4096-sw-None': (2, 16),
2388
+ 'max_model_len-8192-sw-None': (2, 16),
2389
+ 'max_model_len-256-sw-None': (1, 16),
2390
+ 'max_model_len-512-sw-None': (2, 16),
2391
+ 'max_model_len-1024-sw-None': (2, 16),
2392
+ },
2393
+ 'q_head-32_kv_head-16_head-128': {
2394
+ 'max_model_len-4096-sw-None': (4, 32),
2395
+ 'max_model_len-8192-sw-None': (4, 32),
2396
+ 'max_model_len-2048-sw-None': (4, 32),
2397
+ 'max_model_len-256-sw-None': (1, 32),
2398
+ 'max_model_len-512-sw-None': (2, 32),
2399
+ 'max_model_len-1024-sw-None': (4, 32),
2400
+ },
2401
+ 'q_head-64_kv_head-2_head-128': {
2402
+ 'max_model_len-256-sw-None': (1, 16),
2403
+ 'max_model_len-512-sw-None': (2, 16),
2404
+ 'max_model_len-1024-sw-None': (4, 16),
2405
+ 'max_model_len-2048-sw-None': (8, 32),
2406
+ 'max_model_len-4096-sw-None': (16, 16),
2407
+ 'max_model_len-8192-sw-None': (16, 16),
2408
+ },
2409
+ 'q_head-128_kv_head-1_head-256': {
2410
+ 'max_model_len-1024-sw-None': (4, 8),
2411
+ 'max_model_len-2048-sw-None': (8, 8),
2412
+ 'max_model_len-4096-sw-None': (16, 8),
2413
+ 'max_model_len-8192-sw-None': (16, 8),
2414
+ 'max_model_len-256-sw-None': (1, 8),
2415
+ 'max_model_len-512-sw-None': (2, 8),
2416
+ },
2417
+ 'q_head-128_kv_head-8_head-256': {
2418
+ 'max_model_len-256-sw-None': (1, 8),
2419
+ 'max_model_len-512-sw-None': (2, 8),
2420
+ 'max_model_len-1024-sw-None': (4, 8),
2421
+ 'max_model_len-2048-sw-None': (4, 8),
2422
+ 'max_model_len-4096-sw-None': (4, 8),
2423
+ 'max_model_len-8192-sw-None': (4, 8),
2424
+ },
2425
+ 'q_head-32_kv_head-2_head-256': {
2426
+ 'max_model_len-256-sw-None': (1, 16),
2427
+ 'max_model_len-512-sw-None': (2, 16),
2428
+ 'max_model_len-1024-sw-None': (4, 16),
2429
+ 'max_model_len-2048-sw-None': (8, 32),
2430
+ 'max_model_len-4096-sw-None': (8, 32),
2431
+ 'max_model_len-8192-sw-None': (8, 32),
2432
+ },
2433
+ 'q_head-64_kv_head-16_head-128': {
2434
+ 'max_model_len-256-sw-None': (1, 16),
2435
+ 'max_model_len-512-sw-None': (2, 16),
2436
+ 'max_model_len-1024-sw-None': (4, 16),
2437
+ 'max_model_len-2048-sw-None': (4, 16),
2438
+ 'max_model_len-4096-sw-None': (4, 16),
2439
+ 'max_model_len-8192-sw-None': (4, 16),
2440
+ },
2441
+ 'q_head-16_kv_head-4_head-128': {
2442
+ 'max_model_len-256-sw-None': (1, 32),
2443
+ 'max_model_len-512-sw-None': (2, 32),
2444
+ 'max_model_len-1024-sw-None': (4, 32),
2445
+ 'max_model_len-2048-sw-None': (8, 128),
2446
+ 'max_model_len-4096-sw-None': (8, 64),
2447
+ 'max_model_len-8192-sw-None': (8, 128),
2448
+ },
2449
+ 'q_head-4_kv_head-1_head-128': {
2450
+ 'max_model_len-256-sw-None': (1, 128),
2451
+ 'max_model_len-512-sw-None': (2, 128),
2452
+ 'max_model_len-1024-sw-None': (4, 128),
2453
+ 'max_model_len-2048-sw-None': (8, 64),
2454
+ 'max_model_len-4096-sw-None': (16, 32),
2455
+ 'max_model_len-8192-sw-None': (16, 128),
2456
+ },
2457
+ 'q_head-128_kv_head-2_head-128': {
2458
+ 'max_model_len-256-sw-None': (1, 8),
2459
+ 'max_model_len-512-sw-None': (2, 8),
2460
+ 'max_model_len-1024-sw-None': (4, 16),
2461
+ 'max_model_len-2048-sw-None': (8, 16),
2462
+ 'max_model_len-4096-sw-None': (16, 8),
2463
+ 'max_model_len-8192-sw-None': (16, 8),
2464
+ },
2465
+ 'q_head-64_kv_head-2_head-256': {
2466
+ 'max_model_len-256-sw-None': (1, 8),
2467
+ 'max_model_len-512-sw-None': (2, 8),
2468
+ 'max_model_len-1024-sw-None': (4, 16),
2469
+ 'max_model_len-2048-sw-None': (8, 16),
2470
+ 'max_model_len-4096-sw-None': (8, 16),
2471
+ 'max_model_len-8192-sw-None': (8, 16),
2472
+ },
2473
+ 'q_head-128_kv_head-16_head-128': {
2474
+ 'max_model_len-256-sw-None': (1, 8),
2475
+ 'max_model_len-512-sw-None': (2, 8),
2476
+ 'max_model_len-1024-sw-None': (4, 8),
2477
+ 'max_model_len-2048-sw-None': (4, 8),
2478
+ 'max_model_len-4096-sw-None': (4, 8),
2479
+ 'max_model_len-8192-sw-None': (4, 8),
2480
+ },
2481
+ 'q_head-8_kv_head-2_head-256': {
2482
+ 'max_model_len-2048-sw-None': (8, 128),
2483
+ 'max_model_len-4096-sw-None': (8, 128),
2484
+ 'max_model_len-8192-sw-None': (8, 128),
2485
+ 'max_model_len-256-sw-None': (1, 32),
2486
+ 'max_model_len-512-sw-None': (2, 32),
2487
+ 'max_model_len-1024-sw-None': (4, 32),
2488
+ },
2489
+ 'q_head-16_kv_head-2_head-128': {
2490
+ 'max_model_len-8192-sw-None': (16, 32),
2491
+ 'max_model_len-256-sw-None': (1, 32),
2492
+ 'max_model_len-512-sw-None': (2, 32),
2493
+ 'max_model_len-1024-sw-None': (4, 32),
2494
+ 'max_model_len-2048-sw-None': (8, 32),
2495
+ 'max_model_len-4096-sw-None': (16, 32),
2496
+ },
2497
+ 'q_head-32_kv_head-8_head-256': {
2498
+ 'max_model_len-4096-sw-None': (4, 32),
2499
+ 'max_model_len-8192-sw-None': (4, 32),
2500
+ 'max_model_len-256-sw-None': (1, 32),
2501
+ 'max_model_len-512-sw-None': (2, 32),
2502
+ 'max_model_len-1024-sw-None': (4, 32),
2503
+ 'max_model_len-2048-sw-None': (4, 32),
2504
+ },
2505
+ 'q_head-64_kv_head-1_head-256': {
2506
+ 'max_model_len-256-sw-None': (1, 8),
2507
+ 'max_model_len-512-sw-None': (2, 8),
2508
+ 'max_model_len-1024-sw-None': (4, 16),
2509
+ 'max_model_len-2048-sw-None': (8, 16),
2510
+ 'max_model_len-4096-sw-None': (16, 16),
2511
+ 'max_model_len-8192-sw-None': (16, 16),
2512
+ },
2513
+ 'q_head-128_kv_head-1_head-128': {
2514
+ 'max_model_len-1024-sw-None': (4, 8),
2515
+ 'max_model_len-2048-sw-None': (8, 8),
2516
+ 'max_model_len-4096-sw-None': (16, 16),
2517
+ 'max_model_len-256-sw-None': (1, 8),
2518
+ 'max_model_len-8192-sw-None': (16, 16),
2519
+ 'max_model_len-512-sw-None': (2, 8),
2520
+ },
2521
+ 'q_head-128_kv_head-8_head-128': {
2522
+ 'max_model_len-256-sw-None': (1, 8),
2523
+ 'max_model_len-512-sw-None': (2, 16),
2524
+ 'max_model_len-1024-sw-None': (4, 16),
2525
+ 'max_model_len-2048-sw-None': (8, 16),
2526
+ 'max_model_len-4096-sw-None': (8, 16),
2527
+ 'max_model_len-8192-sw-None': (8, 16),
2528
+ },
2529
+ 'q_head-8_kv_head-1_head-128': {
2530
+ 'max_model_len-256-sw-None': (1, 64),
2531
+ 'max_model_len-512-sw-None': (2, 32),
2532
+ 'max_model_len-1024-sw-None': (4, 32),
2533
+ 'max_model_len-2048-sw-None': (8, 32),
2534
+ 'max_model_len-4096-sw-None': (16, 32),
2535
+ 'max_model_len-8192-sw-None': (16, 32),
2536
+ },
2537
+ 'q_head-32_kv_head-4_head-128': {
2538
+ 'max_model_len-256-sw-None': (1, 32),
2539
+ 'max_model_len-512-sw-None': (2, 32),
2540
+ 'max_model_len-1024-sw-None': (4, 32),
2541
+ 'max_model_len-2048-sw-None': (8, 32),
2542
+ 'max_model_len-4096-sw-None': (8, 32),
2543
+ 'max_model_len-8192-sw-None': (8, 32),
2544
+ },
2545
+ 'q_head-64_kv_head-16_head-256': {
2546
+ 'max_model_len-256-sw-None': (1, 8),
2547
+ 'max_model_len-512-sw-None': (2, 8),
2548
+ 'max_model_len-1024-sw-None': (2, 8),
2549
+ 'max_model_len-2048-sw-None': (2, 8),
2550
+ 'max_model_len-4096-sw-None': (2, 8),
2551
+ 'max_model_len-8192-sw-None': (2, 8),
2552
+ },
2553
+ 'q_head-16_kv_head-4_head-256': {
2554
+ 'max_model_len-256-sw-None': (1, 32),
2555
+ 'max_model_len-512-sw-None': (2, 32),
2556
+ 'max_model_len-1024-sw-None': (4, 64),
2557
+ 'max_model_len-2048-sw-None': (8, 128),
2558
+ 'max_model_len-4096-sw-None': (8, 128),
2559
+ 'max_model_len-8192-sw-None': (8, 128),
2560
+ },
2561
+ 'q_head-4_kv_head-1_head-256': {
2562
+ 'max_model_len-256-sw-None': (1, 32),
2563
+ 'max_model_len-512-sw-None': (2, 64),
2564
+ 'max_model_len-1024-sw-None': (4, 32),
2565
+ 'max_model_len-2048-sw-None': (8, 64),
2566
+ 'max_model_len-4096-sw-None': (16, 128),
2567
+ 'max_model_len-8192-sw-None': (16, 128),
2568
+ },
2569
+ 'q_head-64_kv_head-32_head-128': {
2570
+ 'max_model_len-256-sw-None': (1, 8),
2571
+ 'max_model_len-512-sw-None': (2, 8),
2572
+ 'max_model_len-1024-sw-None': (2, 8),
2573
+ 'max_model_len-2048-sw-None': (2, 8),
2574
+ 'max_model_len-4096-sw-None': (2, 8),
2575
+ 'max_model_len-8192-sw-None': (2, 8),
2576
+ },
2577
+ 'q_head-16_kv_head-8_head-128': {
2578
+ 'max_model_len-256-sw-None': (1, 64),
2579
+ 'max_model_len-512-sw-None': (2, 64),
2580
+ 'max_model_len-1024-sw-None': (4, 128),
2581
+ 'max_model_len-2048-sw-None': (8, 128),
2582
+ 'max_model_len-4096-sw-None': (8, 128),
2583
+ 'max_model_len-8192-sw-None': (8, 128),
2584
+ },
2585
+ 'q_head-128_kv_head-2_head-256': {
2586
+ 'max_model_len-256-sw-None': (1, 8),
2587
+ 'max_model_len-512-sw-None': (2, 8),
2588
+ 'max_model_len-1024-sw-None': (4, 8),
2589
+ 'max_model_len-2048-sw-None': (8, 16),
2590
+ 'max_model_len-4096-sw-None': (16, 8),
2591
+ 'max_model_len-8192-sw-None': (8, 16),
2592
+ },
2593
+ 'q_head-4_kv_head-2_head-128': {
2594
+ 'max_model_len-256-sw-None': (1, 128),
2595
+ 'max_model_len-512-sw-None': (2, 128),
2596
+ 'max_model_len-1024-sw-None': (4, 64),
2597
+ 'max_model_len-2048-sw-None': (8, 64),
2598
+ 'max_model_len-4096-sw-None': (16, 128),
2599
+ 'max_model_len-8192-sw-None': (16, 128),
2600
+ },
2601
+ 'q_head-64_kv_head-4_head-128': {
2602
+ 'max_model_len-256-sw-None': (1, 16),
2603
+ 'max_model_len-512-sw-None': (2, 16),
2604
+ 'max_model_len-1024-sw-None': (4, 32),
2605
+ 'max_model_len-2048-sw-None': (8, 32),
2606
+ 'max_model_len-4096-sw-None': (8, 32),
2607
+ 'max_model_len-8192-sw-None': (8, 32),
2608
+ },
2609
+ 'q_head-16_kv_head-1_head-128': {
2610
+ 'max_model_len-256-sw-None': (1, 32),
2611
+ 'max_model_len-512-sw-None': (2, 32),
2612
+ 'max_model_len-1024-sw-None': (4, 32),
2613
+ 'max_model_len-2048-sw-None': (8, 32),
2614
+ 'max_model_len-4096-sw-None': (16, 32),
2615
+ 'max_model_len-8192-sw-None': (16, 32),
2616
+ },
2617
+ 'q_head-32_kv_head-4_head-256': {
2618
+ 'max_model_len-256-sw-None': (1, 16),
2619
+ 'max_model_len-512-sw-None': (2, 32),
2620
+ 'max_model_len-1024-sw-None': (4, 32),
2621
+ 'max_model_len-2048-sw-None': (8, 64),
2622
+ 'max_model_len-4096-sw-None': (8, 32),
2623
+ 'max_model_len-8192-sw-None': (8, 32),
2624
+ },
2625
+ 'q_head-8_kv_head-1_head-256': {
2626
+ 'max_model_len-256-sw-None': (1, 32),
2627
+ 'max_model_len-512-sw-None': (2, 32),
2628
+ 'max_model_len-1024-sw-None': (4, 32),
2629
+ 'max_model_len-2048-sw-None': (8, 32),
2630
+ 'max_model_len-4096-sw-None': (16, 128),
2631
+ 'max_model_len-8192-sw-None': (16, 128),
2632
+ },
2633
+ 'q_head-16_kv_head-8_head-256': {
2634
+ 'max_model_len-256-sw-None': (1, 32),
2635
+ 'max_model_len-512-sw-None': (2, 32),
2636
+ 'max_model_len-1024-sw-None': (4, 64),
2637
+ 'max_model_len-2048-sw-None': (4, 64),
2638
+ 'max_model_len-4096-sw-None': (4, 64),
2639
+ 'max_model_len-8192-sw-None': (4, 64),
2640
+ },
2641
+ 'q_head-4_kv_head-2_head-256': {
2642
+ 'max_model_len-256-sw-None': (1, 32),
2643
+ 'max_model_len-512-sw-None': (2, 64),
2644
+ 'max_model_len-1024-sw-None': (4, 64),
2645
+ 'max_model_len-2048-sw-None': (8, 128),
2646
+ 'max_model_len-4096-sw-None': (8, 128),
2647
+ 'max_model_len-8192-sw-None': (8, 128),
2648
+ },
2649
+ 'q_head-128_kv_head-4_head-128': {
2650
+ 'max_model_len-256-sw-None': (1, 8),
2651
+ 'max_model_len-512-sw-None': (2, 16),
2652
+ 'max_model_len-1024-sw-None': (4, 16),
2653
+ 'max_model_len-2048-sw-None': (8, 16),
2654
+ 'max_model_len-4096-sw-None': (8, 16),
2655
+ 'max_model_len-8192-sw-None': (8, 16),
2656
+ },
2657
+ 'q_head-32_kv_head-1_head-128': {
2658
+ 'max_model_len-256-sw-None': (1, 32),
2659
+ 'max_model_len-512-sw-None': (2, 32),
2660
+ 'max_model_len-1024-sw-None': (4, 32),
2661
+ 'max_model_len-2048-sw-None': (8, 32),
2662
+ 'max_model_len-4096-sw-None': (16, 32),
2663
+ 'max_model_len-8192-sw-None': (16, 32),
2664
+ },
2665
+ 'q_head-64_kv_head-4_head-256': {
2666
+ 'max_model_len-256-sw-None': (1, 16),
2667
+ 'max_model_len-512-sw-None': (2, 16),
2668
+ 'max_model_len-1024-sw-None': (4, 16),
2669
+ 'max_model_len-2048-sw-None': (8, 16),
2670
+ 'max_model_len-4096-sw-None': (4, 32),
2671
+ 'max_model_len-8192-sw-None': (4, 32),
2672
+ },
2673
+ 'q_head-16_kv_head-1_head-256': {
2674
+ 'max_model_len-256-sw-None': (1, 32),
2675
+ 'max_model_len-512-sw-None': (2, 32),
2676
+ 'max_model_len-1024-sw-None': (4, 32),
2677
+ 'max_model_len-2048-sw-None': (8, 32),
2678
+ 'max_model_len-4096-sw-None': (16, 32),
2679
+ 'max_model_len-8192-sw-None': (16, 32),
2680
+ },
2681
+ 'q_head-32_kv_head-8_head-128': {
2682
+ 'max_model_len-256-sw-None': (1, 32),
2683
+ 'max_model_len-512-sw-None': (2, 32),
2684
+ 'max_model_len-1024-sw-None': (4, 32),
2685
+ 'max_model_len-2048-sw-None': (8, 64),
2686
+ 'max_model_len-4096-sw-None': (8, 64),
2687
+ 'max_model_len-8192-sw-None': (8, 64),
2688
+ },
2689
+ 'q_head-8_kv_head-2_head-128': {
2690
+ 'max_model_len-256-sw-None': (1, 32),
2691
+ 'max_model_len-512-sw-None': (2, 32),
2692
+ 'max_model_len-1024-sw-None': (4, 128),
2693
+ 'max_model_len-2048-sw-None': (8, 32),
2694
+ 'max_model_len-4096-sw-None': (16, 128),
2695
+ 'max_model_len-8192-sw-None': (16, 128),
2696
+ },
2697
+ 'q_head-32_kv_head-2_head-128': {
2698
+ 'max_model_len-256-sw-None': (1, 32),
2699
+ 'max_model_len-512-sw-None': (2, 32),
2700
+ 'max_model_len-1024-sw-None': (4, 32),
2701
+ 'max_model_len-2048-sw-None': (8, 32),
2702
+ 'max_model_len-4096-sw-None': (16, 32),
2703
+ 'max_model_len-8192-sw-None': (16, 32),
2704
+ },
2705
+ 'q_head-64_kv_head-1_head-128': {
2706
+ 'max_model_len-256-sw-None': (1, 16),
2707
+ 'max_model_len-512-sw-None': (2, 16),
2708
+ 'max_model_len-1024-sw-None': (4, 16),
2709
+ 'max_model_len-2048-sw-None': (8, 16),
2710
+ 'max_model_len-4096-sw-None': (16, 16),
2711
+ 'max_model_len-8192-sw-None': (16, 16),
2712
+ },
2713
+ 'q_head-128_kv_head-4_head-256': {
2714
+ 'max_model_len-256-sw-None': (1, 8),
2715
+ 'max_model_len-512-sw-None': (2, 8),
2716
+ 'max_model_len-1024-sw-None': (4, 16),
2717
+ 'max_model_len-2048-sw-None': (8, 16),
2718
+ 'max_model_len-4096-sw-None': (8, 16),
2719
+ 'max_model_len-8192-sw-None': (8, 16),
2720
+ },
2721
+ 'q_head-32_kv_head-1_head-256': {
2722
+ 'max_model_len-256-sw-None': (1, 16),
2723
+ 'max_model_len-512-sw-None': (2, 16),
2724
+ 'max_model_len-1024-sw-None': (4, 16),
2725
+ 'max_model_len-2048-sw-None': (8, 16),
2726
+ 'max_model_len-4096-sw-None': (16, 32),
2727
+ 'max_model_len-8192-sw-None': (16, 32),
2728
+ },
2729
+ 'q_head-64_kv_head-8_head-128': {
2730
+ 'max_model_len-256-sw-None': (1, 16),
2731
+ 'max_model_len-512-sw-None': (2, 32),
2732
+ 'max_model_len-1024-sw-None': (4, 32),
2733
+ 'max_model_len-2048-sw-None': (8, 32),
2734
+ 'max_model_len-4096-sw-None': (8, 32),
2735
+ 'max_model_len-8192-sw-None': (8, 32),
2736
+ },
2737
+ 'q_head-64_kv_head-8_head-256': {
2738
+ 'max_model_len-256-sw-None': (1, 16),
2739
+ 'max_model_len-512-sw-None': (2, 16),
2740
+ 'max_model_len-1024-sw-None': (4, 16),
2741
+ 'max_model_len-2048-sw-None': (4, 16),
2742
+ 'max_model_len-4096-sw-None': (4, 16),
2743
+ 'max_model_len-8192-sw-None': (4, 16),
2744
+ },
2745
+ },
2746
+ 'q_bfloat16_kv_float8_e4m3fn': {
2747
+ 'q_head-2_kv_head-2_head-128': {
2748
+ 'max_model_len-4096-sw-None': (16, 128),
2749
+ 'max_model_len-8192-sw-None': (16, 128),
2750
+ 'max_model_len-256-sw-None': (1, 64),
2751
+ 'max_model_len-512-sw-None': (2, 64),
2752
+ 'max_model_len-1024-sw-None': (4, 128),
2753
+ 'max_model_len-2048-sw-None': (8, 128),
2754
+ },
2755
+ 'q_head-8_kv_head-4_head-128': {
2756
+ 'max_model_len-256-sw-None': (1, 64),
2757
+ 'max_model_len-512-sw-None': (2, 64),
2758
+ 'max_model_len-1024-sw-None': (4, 64),
2759
+ 'max_model_len-2048-sw-None': (8, 128),
2760
+ 'max_model_len-4096-sw-None': (16, 128),
2761
+ 'max_model_len-8192-sw-None': (16, 128),
2762
+ },
2763
+ 'q_head-16_kv_head-2_head-256': {
2764
+ 'max_model_len-1024-sw-None': (4, 32),
2765
+ 'max_model_len-2048-sw-None': (8, 32),
2766
+ 'max_model_len-4096-sw-None': (16, 32),
2767
+ 'max_model_len-8192-sw-None': (16, 32),
2768
+ 'max_model_len-256-sw-None': (1, 16),
2769
+ 'max_model_len-512-sw-None': (2, 32),
2770
+ },
2771
+ 'q_head-32_kv_head-2_head-128': {
2772
+ 'max_model_len-4096-sw-None': (16, 32),
2773
+ 'max_model_len-8192-sw-None': (16, 32),
2774
+ 'max_model_len-256-sw-None': (1, 32),
2775
+ 'max_model_len-512-sw-None': (2, 32),
2776
+ 'max_model_len-1024-sw-None': (4, 32),
2777
+ 'max_model_len-2048-sw-None': (8, 32),
2778
+ },
2779
+ 'q_head-32_kv_head-16_head-128': {
2780
+ 'max_model_len-512-sw-None': (2, 32),
2781
+ 'max_model_len-1024-sw-None': (4, 32),
2782
+ 'max_model_len-2048-sw-None': (8, 32),
2783
+ 'max_model_len-4096-sw-None': (8, 32),
2784
+ 'max_model_len-8192-sw-None': (8, 32),
2785
+ 'max_model_len-256-sw-None': (1, 32),
2786
+ },
2787
+ 'q_head-64_kv_head-8_head-256': {
2788
+ 'max_model_len-2048-sw-None': (8, 16),
2789
+ 'max_model_len-4096-sw-None': (8, 16),
2790
+ 'max_model_len-8192-sw-None': (8, 16),
2791
+ 'max_model_len-256-sw-None': (1, 16),
2792
+ 'max_model_len-512-sw-None': (2, 16),
2793
+ 'max_model_len-1024-sw-None': (4, 16),
2794
+ },
2795
+ 'q_head-128_kv_head-2_head-256': {
2796
+ 'max_model_len-512-sw-None': (2, 8),
2797
+ 'max_model_len-1024-sw-None': (4, 8),
2798
+ 'max_model_len-2048-sw-None': (8, 8),
2799
+ 'max_model_len-4096-sw-None': (16, 8),
2800
+ 'max_model_len-8192-sw-None': (16, 8),
2801
+ 'max_model_len-256-sw-None': (1, 8),
2802
+ },
2803
+ 'q_head-128_kv_head-8_head-128': {
2804
+ 'max_model_len-4096-sw-None': (16, 8),
2805
+ 'max_model_len-8192-sw-None': (8, 16),
2806
+ 'max_model_len-256-sw-None': (1, 8),
2807
+ 'max_model_len-512-sw-None': (2, 16),
2808
+ 'max_model_len-1024-sw-None': (4, 16),
2809
+ 'max_model_len-2048-sw-None': (8, 16),
2810
+ },
2811
+ 'q_head-32_kv_head-16_head-256': {
2812
+ 'max_model_len-256-sw-None': (1, 16),
2813
+ 'max_model_len-512-sw-None': (2, 16),
2814
+ 'max_model_len-1024-sw-None': (4, 16),
2815
+ 'max_model_len-2048-sw-None': (4, 16),
2816
+ 'max_model_len-4096-sw-None': (4, 16),
2817
+ 'max_model_len-8192-sw-None': (4, 16),
2818
+ },
2819
+ 'q_head-64_kv_head-2_head-128': {
2820
+ 'max_model_len-256-sw-None': (1, 16),
2821
+ 'max_model_len-512-sw-None': (2, 16),
2822
+ 'max_model_len-1024-sw-None': (4, 16),
2823
+ 'max_model_len-2048-sw-None': (8, 16),
2824
+ 'max_model_len-4096-sw-None': (16, 16),
2825
+ 'max_model_len-8192-sw-None': (16, 16),
2826
+ },
2827
+ 'q_head-8_kv_head-4_head-256': {
2828
+ 'max_model_len-256-sw-None': (1, 32),
2829
+ 'max_model_len-512-sw-None': (2, 32),
2830
+ 'max_model_len-1024-sw-None': (4, 64),
2831
+ 'max_model_len-2048-sw-None': (8, 64),
2832
+ 'max_model_len-4096-sw-None': (16, 128),
2833
+ 'max_model_len-8192-sw-None': (16, 128),
2834
+ },
2835
+ 'q_head-2_kv_head-2_head-256': {
2836
+ 'max_model_len-256-sw-None': (1, 32),
2837
+ 'max_model_len-512-sw-None': (2, 32),
2838
+ 'max_model_len-1024-sw-None': (4, 32),
2839
+ 'max_model_len-2048-sw-None': (8, 128),
2840
+ 'max_model_len-4096-sw-None': (16, 128),
2841
+ 'max_model_len-8192-sw-None': (16, 64),
2842
+ },
2843
+ 'q_head-128_kv_head-8_head-256': {
2844
+ 'max_model_len-256-sw-None': (1, 8),
2845
+ 'max_model_len-512-sw-None': (2, 8),
2846
+ 'max_model_len-1024-sw-None': (4, 8),
2847
+ 'max_model_len-2048-sw-None': (8, 8),
2848
+ 'max_model_len-4096-sw-None': (8, 8),
2849
+ 'max_model_len-8192-sw-None': (8, 8),
2850
+ },
2851
+ 'q_head-32_kv_head-2_head-256': {
2852
+ 'max_model_len-256-sw-None': (1, 16),
2853
+ 'max_model_len-512-sw-None': (2, 16),
2854
+ 'max_model_len-1024-sw-None': (4, 16),
2855
+ 'max_model_len-2048-sw-None': (8, 16),
2856
+ 'max_model_len-4096-sw-None': (16, 32),
2857
+ 'max_model_len-8192-sw-None': (16, 32),
2858
+ },
2859
+ 'q_head-64_kv_head-16_head-128': {
2860
+ 'max_model_len-256-sw-None': (1, 16),
2861
+ 'max_model_len-512-sw-None': (2, 16),
2862
+ 'max_model_len-1024-sw-None': (4, 16),
2863
+ 'max_model_len-2048-sw-None': (8, 16),
2864
+ 'max_model_len-4096-sw-None': (8, 16),
2865
+ 'max_model_len-8192-sw-None': (8, 16),
2866
+ },
2867
+ 'q_head-4_kv_head-2_head-128': {
2868
+ 'max_model_len-256-sw-None': (1, 64),
2869
+ 'max_model_len-512-sw-None': (2, 128),
2870
+ 'max_model_len-1024-sw-None': (4, 128),
2871
+ 'max_model_len-2048-sw-None': (8, 64),
2872
+ 'max_model_len-4096-sw-None': (16, 128),
2873
+ 'max_model_len-8192-sw-None': (16, 128),
2874
+ },
2875
+ 'q_head-16_kv_head-4_head-128': {
2876
+ 'max_model_len-256-sw-None': (1, 32),
2877
+ 'max_model_len-512-sw-None': (2, 32),
2878
+ 'max_model_len-1024-sw-None': (4, 32),
2879
+ 'max_model_len-2048-sw-None': (8, 32),
2880
+ 'max_model_len-4096-sw-None': (16, 32),
2881
+ 'max_model_len-8192-sw-None': (16, 32),
2882
+ },
2883
+ 'q_head-128_kv_head-2_head-128': {
2884
+ 'max_model_len-256-sw-None': (1, 8),
2885
+ 'max_model_len-512-sw-None': (2, 8),
2886
+ 'max_model_len-1024-sw-None': (4, 8),
2887
+ 'max_model_len-2048-sw-None': (8, 8),
2888
+ 'max_model_len-4096-sw-None': (16, 8),
2889
+ 'max_model_len-8192-sw-None': (16, 8),
2890
+ },
2891
+ 'q_head-8_kv_head-2_head-256': {
2892
+ 'max_model_len-256-sw-None': (1, 32),
2893
+ 'max_model_len-512-sw-None': (2, 32),
2894
+ 'max_model_len-1024-sw-None': (4, 32),
2895
+ 'max_model_len-2048-sw-None': (8, 32),
2896
+ 'max_model_len-4096-sw-None': (16, 32),
2897
+ 'max_model_len-8192-sw-None': (16, 32),
2898
+ },
2899
+ 'q_head-16_kv_head-2_head-128': {
2900
+ 'max_model_len-1024-sw-None': (4, 32),
2901
+ 'max_model_len-2048-sw-None': (8, 32),
2902
+ 'max_model_len-256-sw-None': (1, 32),
2903
+ 'max_model_len-512-sw-None': (2, 32),
2904
+ 'max_model_len-4096-sw-None': (16, 32),
2905
+ 'max_model_len-8192-sw-None': (16, 32),
2906
+ },
2907
+ 'q_head-32_kv_head-8_head-256': {
2908
+ 'max_model_len-512-sw-None': (2, 32),
2909
+ 'max_model_len-1024-sw-None': (4, 32),
2910
+ 'max_model_len-2048-sw-None': (8, 32),
2911
+ 'max_model_len-4096-sw-None': (8, 32),
2912
+ 'max_model_len-256-sw-None': (1, 16),
2913
+ 'max_model_len-8192-sw-None': (8, 32),
2914
+ },
2915
+ 'q_head-64_kv_head-8_head-128': {
2916
+ 'max_model_len-2048-sw-None': (8, 32),
2917
+ 'max_model_len-4096-sw-None': (16, 16),
2918
+ 'max_model_len-8192-sw-None': (16, 16),
2919
+ 'max_model_len-256-sw-None': (1, 16),
2920
+ 'max_model_len-512-sw-None': (2, 16),
2921
+ 'max_model_len-1024-sw-None': (4, 16),
2922
+ },
2923
+ 'q_head-128_kv_head-4_head-256': {
2924
+ 'max_model_len-4096-sw-None': (8, 16),
2925
+ 'max_model_len-8192-sw-None': (8, 16),
2926
+ 'max_model_len-256-sw-None': (1, 8),
2927
+ 'max_model_len-512-sw-None': (2, 8),
2928
+ 'max_model_len-1024-sw-None': (4, 8),
2929
+ 'max_model_len-2048-sw-None': (8, 16),
2930
+ },
2931
+ 'q_head-64_kv_head-2_head-256': {
2932
+ 'max_model_len-256-sw-None': (1, 8),
2933
+ 'max_model_len-512-sw-None': (2, 8),
2934
+ 'max_model_len-1024-sw-None': (4, 8),
2935
+ 'max_model_len-2048-sw-None': (8, 16),
2936
+ 'max_model_len-4096-sw-None': (16, 8),
2937
+ 'max_model_len-8192-sw-None': (16, 8),
2938
+ },
2939
+ 'q_head-32_kv_head-4_head-128': {
2940
+ 'max_model_len-256-sw-None': (1, 32),
2941
+ 'max_model_len-512-sw-None': (2, 32),
2942
+ 'max_model_len-1024-sw-None': (4, 32),
2943
+ 'max_model_len-2048-sw-None': (8, 32),
2944
+ 'max_model_len-4096-sw-None': (16, 32),
2945
+ 'max_model_len-8192-sw-None': (16, 32),
2946
+ },
2947
+ 'q_head-128_kv_head-16_head-128': {
2948
+ 'max_model_len-256-sw-None': (1, 8),
2949
+ 'max_model_len-512-sw-None': (2, 8),
2950
+ 'max_model_len-1024-sw-None': (4, 8),
2951
+ 'max_model_len-2048-sw-None': (8, 8),
2952
+ 'max_model_len-4096-sw-None': (8, 8),
2953
+ 'max_model_len-8192-sw-None': (8, 8),
2954
+ },
2955
+ 'q_head-8_kv_head-2_head-128': {
2956
+ 'max_model_len-256-sw-None': (1, 64),
2957
+ 'max_model_len-512-sw-None': (2, 64),
2958
+ 'max_model_len-1024-sw-None': (4, 32),
2959
+ 'max_model_len-2048-sw-None': (8, 32),
2960
+ 'max_model_len-4096-sw-None': (16, 32),
2961
+ 'max_model_len-8192-sw-None': (16, 32),
2962
+ },
2963
+ 'q_head-64_kv_head-16_head-256': {
2964
+ 'max_model_len-256-sw-None': (1, 8),
2965
+ 'max_model_len-512-sw-None': (2, 8),
2966
+ 'max_model_len-1024-sw-None': (4, 8),
2967
+ 'max_model_len-2048-sw-None': (4, 8),
2968
+ 'max_model_len-4096-sw-None': (4, 8),
2969
+ 'max_model_len-8192-sw-None': (4, 8),
2970
+ },
2971
+ 'q_head-16_kv_head-4_head-256': {
2972
+ 'max_model_len-256-sw-None': (1, 32),
2973
+ 'max_model_len-512-sw-None': (2, 32),
2974
+ 'max_model_len-1024-sw-None': (4, 32),
2975
+ 'max_model_len-2048-sw-None': (8, 32),
2976
+ 'max_model_len-4096-sw-None': (8, 32),
2977
+ 'max_model_len-8192-sw-None': (16, 64),
2978
+ },
2979
+ 'q_head-64_kv_head-32_head-128': {
2980
+ 'max_model_len-256-sw-None': (1, 8),
2981
+ 'max_model_len-512-sw-None': (2, 8),
2982
+ 'max_model_len-1024-sw-None': (4, 8),
2983
+ 'max_model_len-2048-sw-None': (4, 8),
2984
+ 'max_model_len-4096-sw-None': (4, 8),
2985
+ 'max_model_len-8192-sw-None': (4, 8),
2986
+ },
2987
+ 'q_head-4_kv_head-2_head-256': {
2988
+ 'max_model_len-256-sw-None': (1, 32),
2989
+ 'max_model_len-512-sw-None': (2, 32),
2990
+ 'max_model_len-1024-sw-None': (4, 32),
2991
+ 'max_model_len-2048-sw-None': (8, 32),
2992
+ 'max_model_len-4096-sw-None': (16, 64),
2993
+ 'max_model_len-8192-sw-None': (16, 128),
2994
+ },
2995
+ 'q_head-16_kv_head-8_head-128': {
2996
+ 'max_model_len-256-sw-None': (1, 32),
2997
+ 'max_model_len-512-sw-None': (2, 32),
2998
+ 'max_model_len-1024-sw-None': (4, 32),
2999
+ 'max_model_len-2048-sw-None': (8, 32),
3000
+ 'max_model_len-4096-sw-None': (16, 64),
3001
+ 'max_model_len-8192-sw-None': (16, 64),
3002
+ },
3003
+ 'q_head-64_kv_head-4_head-128': {
3004
+ 'max_model_len-256-sw-None': (1, 16),
3005
+ 'max_model_len-512-sw-None': (2, 16),
3006
+ 'max_model_len-1024-sw-None': (4, 16),
3007
+ 'max_model_len-2048-sw-None': (8, 16),
3008
+ 'max_model_len-4096-sw-None': (16, 16),
3009
+ 'max_model_len-8192-sw-None': (16, 16),
3010
+ },
3011
+ 'q_head-32_kv_head-4_head-256': {
3012
+ 'max_model_len-256-sw-None': (1, 16),
3013
+ 'max_model_len-512-sw-None': (2, 16),
3014
+ 'max_model_len-1024-sw-None': (4, 32),
3015
+ 'max_model_len-2048-sw-None': (8, 32),
3016
+ 'max_model_len-4096-sw-None': (16, 16),
3017
+ 'max_model_len-8192-sw-None': (8, 32),
3018
+ },
3019
+ 'q_head-16_kv_head-8_head-256': {
3020
+ 'max_model_len-256-sw-None': (1, 32),
3021
+ 'max_model_len-512-sw-None': (2, 32),
3022
+ 'max_model_len-1024-sw-None': (4, 64),
3023
+ 'max_model_len-2048-sw-None': (8, 64),
3024
+ 'max_model_len-4096-sw-None': (8, 64),
3025
+ 'max_model_len-8192-sw-None': (8, 64),
3026
+ },
3027
+ 'q_head-128_kv_head-4_head-128': {
3028
+ 'max_model_len-256-sw-None': (1, 8),
3029
+ 'max_model_len-512-sw-None': (2, 8),
3030
+ 'max_model_len-1024-sw-None': (4, 8),
3031
+ 'max_model_len-2048-sw-None': (8, 16),
3032
+ 'max_model_len-4096-sw-None': (16, 8),
3033
+ 'max_model_len-8192-sw-None': (16, 8),
3034
+ },
3035
+ 'q_head-64_kv_head-4_head-256': {
3036
+ 'max_model_len-256-sw-None': (1, 8),
3037
+ 'max_model_len-512-sw-None': (2, 8),
3038
+ 'max_model_len-1024-sw-None': (4, 16),
3039
+ 'max_model_len-2048-sw-None': (8, 16),
3040
+ 'max_model_len-4096-sw-None': (8, 16),
3041
+ 'max_model_len-8192-sw-None': (8, 16),
3042
+ },
3043
+ 'q_head-32_kv_head-8_head-128': {
3044
+ 'max_model_len-256-sw-None': (1, 32),
3045
+ 'max_model_len-512-sw-None': (2, 32),
3046
+ 'max_model_len-1024-sw-None': (4, 32),
3047
+ 'max_model_len-2048-sw-None': (8, 32),
3048
+ 'max_model_len-4096-sw-None': (16, 32),
3049
+ 'max_model_len-8192-sw-None': (16, 32),
3050
+ },
3051
+ },
3052
+ },
3053
+ },
3054
+ 'TPU v5e': {
3055
+ 128: {
3056
+ 'q_bfloat16_kv_bfloat16': {
3057
+ 'q_head-128_kv_head-1_head-128': {
3058
+ 'max_model_len-1024-sw-None': (4, 32),
3059
+ 'max_model_len-128-sw-None': (1, 8),
3060
+ 'max_model_len-2048-sw-None': (16, 8),
3061
+ 'max_model_len-256-sw-None': (2, 8),
3062
+ 'max_model_len-4096-sw-None': (16, 16),
3063
+ 'max_model_len-512-sw-None': (4, 8),
3064
+ 'max_model_len-8192-sw-None': (16, 16),
3065
+ },
3066
+ 'q_head-128_kv_head-1_head-256': {
3067
+ 'max_model_len-1024-sw-None': (8, 16),
3068
+ 'max_model_len-128-sw-None': (1, 8),
3069
+ 'max_model_len-2048-sw-None': (16, 8),
3070
+ 'max_model_len-256-sw-None': (2, 8),
3071
+ 'max_model_len-4096-sw-None': (16, 8),
3072
+ 'max_model_len-512-sw-None': (2, 8),
3073
+ 'max_model_len-8192-sw-None': (16, 8),
3074
+ },
3075
+ 'q_head-128_kv_head-16_head-128': {
3076
+ 'max_model_len-1024-sw-None': (8, 16),
3077
+ 'max_model_len-128-sw-None': (1, 16),
3078
+ 'max_model_len-2048-sw-None': (8, 16),
3079
+ 'max_model_len-256-sw-None': (2, 8),
3080
+ 'max_model_len-4096-sw-None': (8, 16),
3081
+ 'max_model_len-512-sw-None': (2, 16),
3082
+ 'max_model_len-8192-sw-None': (8, 16),
3083
+ },
3084
+ 'q_head-128_kv_head-16_head-256': {
3085
+ 'max_model_len-1024-sw-None': (4, 8),
3086
+ 'max_model_len-128-sw-None': (1, 8),
3087
+ 'max_model_len-2048-sw-None': (4, 8),
3088
+ 'max_model_len-256-sw-None': (2, 8),
3089
+ 'max_model_len-4096-sw-None': (4, 8),
3090
+ 'max_model_len-512-sw-None': (4, 8),
3091
+ 'max_model_len-8192-sw-None': (4, 8),
3092
+ },
3093
+ 'q_head-128_kv_head-2_head-128': {
3094
+ 'max_model_len-1024-sw-None': (8, 8),
3095
+ 'max_model_len-128-sw-None': (1, 8),
3096
+ 'max_model_len-2048-sw-None': (16, 8),
3097
+ 'max_model_len-256-sw-None': (2, 16),
3098
+ 'max_model_len-4096-sw-None': (8, 16),
3099
+ 'max_model_len-512-sw-None': (4, 16),
3100
+ 'max_model_len-8192-sw-None': (16, 16),
3101
+ },
3102
+ 'q_head-128_kv_head-2_head-256': {
3103
+ 'max_model_len-1024-sw-None': (8, 8),
3104
+ 'max_model_len-128-sw-None': (1, 8),
3105
+ 'max_model_len-2048-sw-None': (16, 8),
3106
+ 'max_model_len-256-sw-None': (2, 8),
3107
+ 'max_model_len-4096-sw-None': (8, 16),
3108
+ 'max_model_len-512-sw-None': (4, 8),
3109
+ 'max_model_len-8192-sw-None': (8, 8),
3110
+ },
3111
+ 'q_head-128_kv_head-4_head-128': {
3112
+ 'max_model_len-1024-sw-None': (8, 8),
3113
+ 'max_model_len-128-sw-None': (1, 16),
3114
+ 'max_model_len-2048-sw-None': (8, 8),
3115
+ 'max_model_len-256-sw-None': (2, 8),
3116
+ 'max_model_len-4096-sw-None': (8, 32),
3117
+ 'max_model_len-512-sw-None': (4, 8),
3118
+ 'max_model_len-8192-sw-None': (8, 16),
3119
+ },
3120
+ 'q_head-128_kv_head-4_head-256': {
3121
+ 'max_model_len-1024-sw-None': (4, 8),
3122
+ 'max_model_len-128-sw-None': (1, 8),
3123
+ 'max_model_len-2048-sw-None': (8, 16),
3124
+ 'max_model_len-256-sw-None': (2, 8),
3125
+ 'max_model_len-4096-sw-None': (8, 16),
3126
+ 'max_model_len-512-sw-None': (4, 8),
3127
+ 'max_model_len-8192-sw-None': (8, 16),
3128
+ },
3129
+ 'q_head-128_kv_head-8_head-128': {
3130
+ 'max_model_len-1024-sw-None': (8, 32),
3131
+ 'max_model_len-128-sw-None': (1, 8),
3132
+ 'max_model_len-2048-sw-None': (8, 16),
3133
+ 'max_model_len-256-sw-None': (2, 16),
3134
+ 'max_model_len-4096-sw-None': (8, 16),
3135
+ 'max_model_len-512-sw-None': (4, 16),
3136
+ 'max_model_len-8192-sw-None': (8, 16),
3137
+ },
3138
+ 'q_head-128_kv_head-8_head-256': {
3139
+ 'max_model_len-1024-sw-None': (4, 16),
3140
+ 'max_model_len-128-sw-None': (1, 8),
3141
+ 'max_model_len-2048-sw-None': (8, 16),
3142
+ 'max_model_len-256-sw-None': (2, 8),
3143
+ 'max_model_len-4096-sw-None': (8, 16),
3144
+ 'max_model_len-512-sw-None': (4, 16),
3145
+ 'max_model_len-8192-sw-None': (4, 16),
3146
+ },
3147
+ 'q_head-16_kv_head-1_head-128': {
3148
+ 'max_model_len-2048-sw-None': (8, 64),
3149
+ 512: (4, 64)
3150
+ },
3151
+ 'q_head-16_kv_head-1_head-256': {
3152
+ 'max_model_len-128-sw-None': (1, 32),
3153
+ 256: (2, 8)
3154
+ },
3155
+ 'q_head-16_kv_head-2_head-128': {
3156
+ 'max_model_len-128-sw-None': (1, 128),
3157
+ 'max_model_len-256-sw-None': (2, 8),
3158
+ 'max_model_len-512-sw-None': (2, 32),
3159
+ 'max_model_len-8192-sw-None': (16, 32),
3160
+ },
3161
+ 'q_head-16_kv_head-2_head-256': {
3162
+ 'max_model_len-128-sw-None': (1, 32),
3163
+ 'max_model_len-2048-sw-None': (8, 32),
3164
+ 'max_model_len-256-sw-None': (2, 32),
3165
+ },
3166
+ 'q_head-16_kv_head-4_head-128': {
3167
+ 'max_model_len-1024-sw-None': (8, 32),
3168
+ 'max_model_len-128-sw-None': (1, 64),
3169
+ 'max_model_len-256-sw-None': (2, 16),
3170
+ 'max_model_len-512-sw-None': (4, 64),
3171
+ },
3172
+ 'q_head-16_kv_head-4_head-256': {
3173
+ 'max_model_len-1024-sw-None': (8, 128),
3174
+ 'max_model_len-128-sw-None': (1, 16),
3175
+ 'max_model_len-2048-sw-None': (8, 64),
3176
+ 'max_model_len-256-sw-None': (2, 32),
3177
+ 'max_model_len-4096-sw-None': (8, 32),
3178
+ 'max_model_len-512-sw-None': (4, 32),
3179
+ 'max_model_len-8192-sw-None': (16, 64),
3180
+ },
3181
+ 'q_head-16_kv_head-8_head-128': {
3182
+ 'max_model_len-1024-sw-None': (8, 256),
3183
+ 'max_model_len-128-sw-None': (1, 128),
3184
+ 'max_model_len-2048-sw-None': (8, 128),
3185
+ 'max_model_len-256-sw-None': (2, 16),
3186
+ 'max_model_len-4096-sw-None': (8, 64),
3187
+ 'max_model_len-512-sw-None': (4, 64),
3188
+ 'max_model_len-8192-sw-None': (4, 128),
3189
+ },
3190
+ 'q_head-16_kv_head-8_head-256': {
3191
+ 'max_model_len-1024-sw-None': (8, 128),
3192
+ 'max_model_len-128-sw-None': (1, 16),
3193
+ 'max_model_len-2048-sw-None': (8, 128),
3194
+ 'max_model_len-256-sw-None': (2, 64),
3195
+ 'max_model_len-4096-sw-None': (8, 128),
3196
+ 'max_model_len-512-sw-None': (2, 32),
3197
+ 'max_model_len-8192-sw-None': (8, 128),
3198
+ },
3199
+ 'q_head-2_kv_head-1_head-128': {
3200
+ 'max_model_len-1024-sw-None': (8, 128),
3201
+ 'max_model_len-128-sw-None': (1, 256),
3202
+ 'max_model_len-2048-sw-None': (8, 32),
3203
+ 'max_model_len-256-sw-None': (2, 8),
3204
+ 'max_model_len-512-sw-None': (4, 256),
3205
+ 'max_model_len-8192-sw-None': (16, 32),
3206
+ },
3207
+ 'q_head-2_kv_head-1_head-256': {
3208
+ 'max_model_len-1024-sw-None': (8, 128),
3209
+ 'max_model_len-2048-sw-None': (8, 64),
3210
+ 'max_model_len-256-sw-None': (2, 8),
3211
+ 'max_model_len-4096-sw-None': (8, 128),
3212
+ 'max_model_len-512-sw-None': (4, 32),
3213
+ 'max_model_len-8192-sw-None': (16, 64),
3214
+ },
3215
+ 'q_head-32_kv_head-1_head-128': {
3216
+ 'max_model_len-1024-sw-None': (8, 16),
3217
+ 'max_model_len-128-sw-None': (1, 128),
3218
+ 'max_model_len-2048-sw-None': (8, 32),
3219
+ 'max_model_len-256-sw-None': (2, 16),
3220
+ 'max_model_len-4096-sw-None': (16, 64),
3221
+ 'max_model_len-512-sw-None': (4, 64),
3222
+ 'max_model_len-8192-sw-None': (16, 16),
3223
+ },
3224
+ 'q_head-32_kv_head-1_head-256': {
3225
+ 'max_model_len-1024-sw-None': (8, 16),
3226
+ 'max_model_len-128-sw-None': (1, 16),
3227
+ 'max_model_len-2048-sw-None': (16, 32),
3228
+ 'max_model_len-256-sw-None': (2, 8),
3229
+ 'max_model_len-4096-sw-None': (16, 16),
3230
+ 'max_model_len-512-sw-None': (4, 16),
3231
+ 'max_model_len-8192-sw-None': (16, 16),
3232
+ },
3233
+ 'q_head-32_kv_head-16_head-128': {
3234
+ 'max_model_len-1024-sw-None': (8, 64),
3235
+ 'max_model_len-128-sw-None': (1, 8),
3236
+ 'max_model_len-2048-sw-None': (8, 64),
3237
+ 'max_model_len-256-sw-None': (2, 32),
3238
+ 'max_model_len-4096-sw-None': (8, 64),
3239
+ 'max_model_len-512-sw-None': (4, 64),
3240
+ 'max_model_len-8192-sw-None': (8, 64),
3241
+ },
3242
+ 'q_head-32_kv_head-16_head-256': {
3243
+ 'max_model_len-1024-sw-None': (4, 32),
3244
+ 'max_model_len-128-sw-None': (1, 8),
3245
+ 'max_model_len-2048-sw-None': (4, 32),
3246
+ 'max_model_len-256-sw-None': (2, 32),
3247
+ 'max_model_len-4096-sw-None': (4, 32),
3248
+ 'max_model_len-512-sw-None': (4, 32),
3249
+ 'max_model_len-8192-sw-None': (4, 32),
3250
+ },
3251
+ 'q_head-32_kv_head-2_head-128': {
3252
+ 'max_model_len-1024-sw-None': (4, 8),
3253
+ 'max_model_len-128-sw-None': (1, 32),
3254
+ 'max_model_len-2048-sw-None': (8, 64),
3255
+ 'max_model_len-256-sw-None': (2, 8),
3256
+ 'max_model_len-4096-sw-None': (16, 32),
3257
+ 'max_model_len-512-sw-None': (4, 32),
3258
+ 'max_model_len-8192-sw-None': (16, 16),
3259
+ },
3260
+ 'q_head-32_kv_head-2_head-256': {
3261
+ 'max_model_len-1024-sw-None': (8, 16),
3262
+ 'max_model_len-128-sw-None': (1, 16),
3263
+ 'max_model_len-2048-sw-None': (8, 32),
3264
+ 'max_model_len-256-sw-None': (2, 16),
3265
+ 'max_model_len-4096-sw-None': (8, 32),
3266
+ 'max_model_len-512-sw-None': (4, 8),
3267
+ 'max_model_len-8192-sw-None': (8, 32),
3268
+ },
3269
+ 'q_head-32_kv_head-4_head-128': {
3270
+ 'max_model_len-1024-sw-None': (8, 64),
3271
+ 'max_model_len-128-sw-None': (1, 32),
3272
+ 'max_model_len-2048-sw-None': (8, 64),
3273
+ 'max_model_len-256-sw-None': (2, 16),
3274
+ 'max_model_len-4096-sw-None': (8, 32),
3275
+ 'max_model_len-512-sw-None': (4, 16),
3276
+ 'max_model_len-8192-sw-None': (8, 32),
3277
+ },
3278
+ 'q_head-32_kv_head-4_head-256': {
3279
+ 'max_model_len-1024-sw-None': (8, 32),
3280
+ 'max_model_len-128-sw-None': (1, 16),
3281
+ 'max_model_len-2048-sw-None': (8, 32),
3282
+ 'max_model_len-256-sw-None': (2, 32),
3283
+ 'max_model_len-4096-sw-None': (8, 32),
3284
+ 'max_model_len-512-sw-None': (4, 16),
3285
+ 'max_model_len-8192-sw-None': (8, 32),
3286
+ },
3287
+ 'q_head-32_kv_head-8_head-128': {
3288
+ 'max_model_len-1024-sw-None': (8, 128),
3289
+ 'max_model_len-128-sw-None': (1, 16),
3290
+ 'max_model_len-2048-sw-None': (4, 32),
3291
+ 'max_model_len-256-sw-None': (1, 16),
3292
+ 'max_model_len-4096-sw-None': (16, 32),
3293
+ 'max_model_len-512-sw-None': (4, 64),
3294
+ 'max_model_len-8192-sw-None': (4, 64),
3295
+ },
3296
+ 'q_head-32_kv_head-8_head-256': {
3297
+ 'max_model_len-1024-sw-None': (8, 32),
3298
+ 'max_model_len-128-sw-None': (1, 8),
3299
+ 'max_model_len-2048-sw-None': (4, 64),
3300
+ 'max_model_len-256-sw-None': (2, 16),
3301
+ 'max_model_len-4096-sw-None': (8, 64),
3302
+ 'max_model_len-512-sw-None': (4, 32),
3303
+ 'max_model_len-8192-sw-None': (8, 64),
3304
+ },
3305
+ 'q_head-4_kv_head-1_head-128': {
3306
+ 'max_model_len-1024-sw-None': (8, 32),
3307
+ 'max_model_len-2048-sw-None': (8, 128),
3308
+ 'max_model_len-256-sw-None': (1, 256),
3309
+ 'max_model_len-4096-sw-None': (16, 128),
3310
+ 'max_model_len-512-sw-None': (4, 128),
3311
+ 'max_model_len-8192-sw-None': (16, 16),
3312
+ },
3313
+ 'q_head-4_kv_head-1_head-256': {
3314
+ 'max_model_len-1024-sw-None': (8, 16),
3315
+ 'max_model_len-2048-sw-None': (8, 32),
3316
+ 'max_model_len-4096-sw-None': (16, 32),
3317
+ 'max_model_len-8192-sw-None': (16, 32),
3318
+ },
3319
+ 'q_head-4_kv_head-2_head-128': {
3320
+ 'max_model_len-1024-sw-None': (8, 64),
3321
+ 'max_model_len-128-sw-None': (1, 64),
3322
+ 'max_model_len-2048-sw-None': (8, 128),
3323
+ 'max_model_len-256-sw-None': (1, 256),
3324
+ 'max_model_len-4096-sw-None': (16, 128),
3325
+ 'max_model_len-8192-sw-None': (8, 32),
3326
+ },
3327
+ 'q_head-4_kv_head-2_head-256': {
3328
+ 'max_model_len-1024-sw-None': (8, 32),
3329
+ 'max_model_len-128-sw-None': (1, 8),
3330
+ 'max_model_len-4096-sw-None': (8, 256),
3331
+ 'max_model_len-8192-sw-None': (8, 128),
3332
+ },
3333
+ 'q_head-64_kv_head-1_head-128': {
3334
+ 'max_model_len-1024-sw-None': (4, 32),
3335
+ 'max_model_len-128-sw-None': (1, 16),
3336
+ 'max_model_len-2048-sw-None': (16, 32),
3337
+ 'max_model_len-256-sw-None': (2, 32),
3338
+ 'max_model_len-4096-sw-None': (16, 32),
3339
+ 'max_model_len-512-sw-None': (4, 16),
3340
+ 'max_model_len-8192-sw-None': (16, 32),
3341
+ },
3342
+ 'q_head-64_kv_head-1_head-256': {
3343
+ 'max_model_len-1024-sw-None': (8, 16),
3344
+ 'max_model_len-128-sw-None': (1, 8),
3345
+ 'max_model_len-2048-sw-None': (16, 8),
3346
+ 'max_model_len-256-sw-None': (2, 16),
3347
+ 'max_model_len-4096-sw-None': (16, 16),
3348
+ 'max_model_len-512-sw-None': (4, 16),
3349
+ 'max_model_len-8192-sw-None': (16, 16),
3350
+ },
3351
+ 'q_head-64_kv_head-16_head-128': {
3352
+ 'max_model_len-1024-sw-None': (4, 32),
3353
+ 'max_model_len-128-sw-None': (1, 16),
3354
+ 'max_model_len-2048-sw-None': (8, 32),
3355
+ 'max_model_len-256-sw-None': (2, 32),
3356
+ 'max_model_len-4096-sw-None': (8, 32),
3357
+ 'max_model_len-512-sw-None': (2, 32),
3358
+ 'max_model_len-8192-sw-None': (8, 32),
3359
+ },
3360
+ 'q_head-64_kv_head-16_head-256': {
3361
+ 'max_model_len-1024-sw-None': (4, 16),
3362
+ 'max_model_len-128-sw-None': (1, 16),
3363
+ 'max_model_len-2048-sw-None': (4, 16),
3364
+ 'max_model_len-256-sw-None': (2, 16),
3365
+ 'max_model_len-4096-sw-None': (4, 16),
3366
+ 'max_model_len-512-sw-None': (4, 16),
3367
+ 'max_model_len-8192-sw-None': (4, 16),
3368
+ },
3369
+ 'q_head-64_kv_head-2_head-128': {
3370
+ 'max_model_len-1024-sw-None': (8, 8),
3371
+ 'max_model_len-128-sw-None': (1, 16),
3372
+ 'max_model_len-2048-sw-None': (8, 16),
3373
+ 'max_model_len-256-sw-None': (1, 16),
3374
+ 'max_model_len-4096-sw-None': (8, 16),
3375
+ 'max_model_len-512-sw-None': (4, 16),
3376
+ 'max_model_len-8192-sw-None': (8, 32),
3377
+ },
3378
+ 'q_head-64_kv_head-2_head-256': {
3379
+ 'max_model_len-1024-sw-None': (4, 8),
3380
+ 'max_model_len-128-sw-None': (1, 8),
3381
+ 'max_model_len-2048-sw-None': (16, 16),
3382
+ 'max_model_len-256-sw-None': (2, 8),
3383
+ 'max_model_len-4096-sw-None': (8, 16),
3384
+ 'max_model_len-512-sw-None': (4, 8),
3385
+ 'max_model_len-8192-sw-None': (8, 16),
3386
+ },
3387
+ 'q_head-64_kv_head-4_head-128': {
3388
+ 'max_model_len-1024-sw-None': (8, 32),
3389
+ 'max_model_len-128-sw-None': (1, 8),
3390
+ 'max_model_len-2048-sw-None': (16, 16),
3391
+ 'max_model_len-256-sw-None': (1, 32),
3392
+ 'max_model_len-4096-sw-None': (8, 32),
3393
+ 'max_model_len-512-sw-None': (4, 32),
3394
+ 'max_model_len-8192-sw-None': (16, 32),
3395
+ },
3396
+ 'q_head-64_kv_head-4_head-256': {
3397
+ 'max_model_len-1024-sw-None': (4, 16),
3398
+ 'max_model_len-128-sw-None': (1, 8),
3399
+ 'max_model_len-2048-sw-None': (8, 32),
3400
+ 'max_model_len-256-sw-None': (1, 8),
3401
+ 'max_model_len-4096-sw-None': (8, 32),
3402
+ 'max_model_len-512-sw-None': (4, 16),
3403
+ 'max_model_len-8192-sw-None': (8, 32),
3404
+ },
3405
+ 'q_head-64_kv_head-8_head-128': {
3406
+ 'max_model_len-1024-sw-None': (8, 16),
3407
+ 'max_model_len-128-sw-None': (1, 32),
3408
+ 'max_model_len-2048-sw-None': (4, 32),
3409
+ 'max_model_len-256-sw-None': (2, 64),
3410
+ 'max_model_len-4096-sw-None': (4, 32),
3411
+ 'max_model_len-512-sw-None': (4, 32),
3412
+ 'max_model_len-8192-sw-None': (16, 32),
3413
+ },
3414
+ 'q_head-64_kv_head-8_head-256': {
3415
+ 'max_model_len-1024-sw-None': (8, 32),
3416
+ 'max_model_len-128-sw-None': (1, 8),
3417
+ 'max_model_len-2048-sw-None': (8, 32),
3418
+ 'max_model_len-256-sw-None': (2, 16),
3419
+ 'max_model_len-4096-sw-None': (4, 32),
3420
+ 'max_model_len-512-sw-None': (4, 16),
3421
+ 'max_model_len-8192-sw-None': (8, 32),
3422
+ },
3423
+ 'q_head-8_kv_head-1_head-128': {
3424
+ 'max_model_len-2048-sw-None': (8, 32),
3425
+ 'max_model_len-4096-sw-None': (8, 16),
3426
+ 'max_model_len-512-sw-None': (4, 128),
3427
+ 'max_model_len-8192-sw-None': (16, 32),
3428
+ },
3429
+ 'q_head-8_kv_head-1_head-256': {
3430
+ 'max_model_len-128-sw-None': (1, 8),
3431
+ 'max_model_len-2048-sw-None': (8, 16),
3432
+ 'max_model_len-8192-sw-None': (8, 32),
3433
+ },
3434
+ 'q_head-8_kv_head-2_head-128': {
3435
+ 'max_model_len-128-sw-None': (1, 64),
3436
+ 'max_model_len-256-sw-None': (2, 64),
3437
+ 'max_model_len-4096-sw-None': (16, 32),
3438
+ 'max_model_len-512-sw-None': (4, 64),
3439
+ 'max_model_len-8192-sw-None': (16, 128),
3440
+ },
3441
+ 'q_head-8_kv_head-2_head-256': {
3442
+ 'max_model_len-1024-sw-None': (8, 128),
3443
+ 'max_model_len-128-sw-None': (1, 32),
3444
+ 'max_model_len-8192-sw-None': (8, 128),
3445
+ },
3446
+ 'q_head-8_kv_head-4_head-128': {
3447
+ 'max_model_len-128-sw-None': (1, 16),
3448
+ 'max_model_len-256-sw-None': (2, 32),
3449
+ 'max_model_len-4096-sw-None': (16, 32),
3450
+ 'max_model_len-512-sw-None': (4, 8),
3451
+ },
3452
+ 'q_head-8_kv_head-4_head-256': {
3453
+ 'max_model_len-128-sw-None': (1, 32),
3454
+ 'max_model_len-2048-sw-None': (8, 128),
3455
+ 'max_model_len-256-sw-None': (2, 32),
3456
+ 'max_model_len-512-sw-None': (4, 16),
3457
+ },
3458
+ }
3459
+ },
3460
+ 256: {
3461
+ 'q_bfloat16_kv_bfloat16': {
3462
+ 'q_head-128_kv_head-1_head-128': {
3463
+ 'max_model_len-1024-sw-None': (2, 16),
3464
+ 'max_model_len-2048-sw-None': (4, 8),
3465
+ 'max_model_len-256-sw-None': (1, 8),
3466
+ 'max_model_len-4096-sw-None': (8, 8),
3467
+ 'max_model_len-512-sw-None': (2, 8),
3468
+ 'max_model_len-8192-sw-None': (8, 16),
3469
+ },
3470
+ 'q_head-128_kv_head-1_head-256': {
3471
+ 'max_model_len-1024-sw-None': (4, 8),
3472
+ 'max_model_len-2048-sw-None': (4, 8),
3473
+ 'max_model_len-256-sw-None': (1, 8),
3474
+ 'max_model_len-4096-sw-None': (8, 8),
3475
+ 'max_model_len-512-sw-None': (2, 8),
3476
+ 'max_model_len-8192-sw-None': (8, 8),
3477
+ },
3478
+ 'q_head-128_kv_head-16_head-128': {
3479
+ 'max_model_len-1024-sw-None': (4, 16),
3480
+ 'max_model_len-2048-sw-None': (4, 16),
3481
+ 'max_model_len-256-sw-None': (1, 16),
3482
+ 'max_model_len-4096-sw-None': (4, 16),
3483
+ 'max_model_len-512-sw-None': (2, 16),
3484
+ 'max_model_len-8192-sw-None': (4, 16),
3485
+ },
3486
+ 'q_head-128_kv_head-16_head-256': {
3487
+ 'max_model_len-1024-sw-None': (2, 8),
3488
+ 'max_model_len-2048-sw-None': (2, 8),
3489
+ 'max_model_len-256-sw-None': (1, 8),
3490
+ 'max_model_len-4096-sw-None': (2, 8),
3491
+ 'max_model_len-512-sw-None': (2, 8),
3492
+ 'max_model_len-8192-sw-None': (2, 8),
3493
+ },
3494
+ 'q_head-128_kv_head-2_head-128': {
3495
+ 'max_model_len-1024-sw-None': (4, 8),
3496
+ 'max_model_len-2048-sw-None': (8, 8),
3497
+ 'max_model_len-256-sw-None': (1, 16),
3498
+ 'max_model_len-4096-sw-None': (8, 8),
3499
+ 'max_model_len-512-sw-None': (2, 8),
3500
+ 'max_model_len-8192-sw-None': (8, 16),
3501
+ },
3502
+ 'q_head-128_kv_head-2_head-256': {
3503
+ 'max_model_len-1024-sw-None': (4, 8),
3504
+ 'max_model_len-2048-sw-None': (4, 8),
3505
+ 'max_model_len-256-sw-None': (1, 8),
3506
+ 'max_model_len-4096-sw-None': (8, 8),
3507
+ 'max_model_len-512-sw-None': (1, 8),
3508
+ 'max_model_len-8192-sw-None': (8, 8),
3509
+ },
3510
+ 'q_head-128_kv_head-4_head-128': {
3511
+ 'max_model_len-1024-sw-None': (4, 16),
3512
+ 'max_model_len-2048-sw-None': (4, 16),
3513
+ 'max_model_len-256-sw-None': (1, 32),
3514
+ 'max_model_len-4096-sw-None': (8, 16),
3515
+ 'max_model_len-512-sw-None': (2, 32),
3516
+ 'max_model_len-8192-sw-None': (4, 16),
3517
+ },
3518
+ 'q_head-128_kv_head-4_head-256': {
3519
+ 'max_model_len-1024-sw-None': (2, 8),
3520
+ 'max_model_len-2048-sw-None': (4, 16),
3521
+ 'max_model_len-256-sw-None': (1, 8),
3522
+ 'max_model_len-4096-sw-None': (8, 8),
3523
+ 'max_model_len-512-sw-None': (2, 8),
3524
+ 'max_model_len-8192-sw-None': (4, 16),
3525
+ },
3526
+ 'q_head-128_kv_head-8_head-128': {
3527
+ 'max_model_len-1024-sw-None': (4, 16),
3528
+ 'max_model_len-2048-sw-None': (4, 32),
3529
+ 'max_model_len-256-sw-None': (1, 32),
3530
+ 'max_model_len-4096-sw-None': (4, 32),
3531
+ 'max_model_len-512-sw-None': (2, 16),
3532
+ 'max_model_len-8192-sw-None': (2, 32),
3533
+ },
3534
+ 'q_head-128_kv_head-8_head-256': {
3535
+ 'max_model_len-1024-sw-None': (4, 16),
3536
+ 'max_model_len-2048-sw-None': (2, 16),
3537
+ 'max_model_len-256-sw-None': (1, 8),
3538
+ 'max_model_len-4096-sw-None': (2, 16),
3539
+ 'max_model_len-512-sw-None': (2, 16),
3540
+ 'max_model_len-8192-sw-None': (2, 16),
3541
+ },
3542
+ 'q_head-16_kv_head-1_head-128': {
3543
+ 'max_model_len-1024-sw-None': (2, 32),
3544
+ 'max_model_len-2048-sw-None': (8, 16),
3545
+ 'max_model_len-256-sw-None': (1, 32),
3546
+ 'max_model_len-4096-sw-None': (8, 32),
3547
+ 'max_model_len-512-sw-None': (1, 64),
3548
+ 'max_model_len-8192-sw-None': (8, 32),
3549
+ },
3550
+ 'q_head-16_kv_head-1_head-256': {
3551
+ 'max_model_len-1024-sw-None': (4, 32),
3552
+ 'max_model_len-2048-sw-None': (4, 16),
3553
+ 'max_model_len-256-sw-None': (1, 32),
3554
+ 'max_model_len-4096-sw-None': (8, 16),
3555
+ 'max_model_len-512-sw-None': (2, 8),
3556
+ 'max_model_len-8192-sw-None': (8, 16),
3557
+ },
3558
+ 'q_head-16_kv_head-2_head-128': {
3559
+ 'max_model_len-1024-sw-None': (4, 16),
3560
+ 'max_model_len-2048-sw-None': (4, 32),
3561
+ 'max_model_len-256-sw-None': (1, 8),
3562
+ 'max_model_len-4096-sw-None': (4, 64),
3563
+ 'max_model_len-512-sw-None': (2, 16),
3564
+ 'max_model_len-8192-sw-None': (8, 128),
3565
+ },
3566
+ 'q_head-16_kv_head-2_head-256': {
3567
+ 'max_model_len-1024-sw-None': (4, 32),
3568
+ 'max_model_len-2048-sw-None': (4, 16),
3569
+ 'max_model_len-256-sw-None': (1, 64),
3570
+ 'max_model_len-4096-sw-None': (8, 32),
3571
+ 'max_model_len-512-sw-None': (2, 16),
3572
+ 'max_model_len-8192-sw-None': (4, 32),
3573
+ },
3574
+ 'q_head-16_kv_head-4_head-128': {
3575
+ 'max_model_len-1024-sw-None': (2, 64),
3576
+ 'max_model_len-2048-sw-None': (2, 64),
3577
+ 'max_model_len-256-sw-None': (1, 64),
3578
+ 'max_model_len-4096-sw-None': (4, 32),
3579
+ 'max_model_len-512-sw-None': (2, 128),
3580
+ 'max_model_len-8192-sw-None': (8, 32),
3581
+ },
3582
+ 'q_head-16_kv_head-4_head-256': {
3583
+ 'max_model_len-1024-sw-None': (2, 64),
3584
+ 'max_model_len-2048-sw-None': (8, 32),
3585
+ 'max_model_len-256-sw-None': (1, 32),
3586
+ 'max_model_len-4096-sw-None': (4, 128),
3587
+ 'max_model_len-512-sw-None': (2, 16),
3588
+ 'max_model_len-8192-sw-None': (4, 32),
3589
+ },
3590
+ 'q_head-16_kv_head-8_head-128': {
3591
+ 'max_model_len-1024-sw-None': (4, 64),
3592
+ 'max_model_len-2048-sw-None': (4, 32),
3593
+ 'max_model_len-256-sw-None': (1, 8),
3594
+ 'max_model_len-4096-sw-None': (2, 128),
3595
+ 'max_model_len-512-sw-None': (2, 64),
3596
+ 'max_model_len-8192-sw-None': (8, 128),
3597
+ },
3598
+ 'q_head-16_kv_head-8_head-256': {
3599
+ 'max_model_len-1024-sw-None': (4, 64),
3600
+ 'max_model_len-2048-sw-None': (4, 128),
3601
+ 'max_model_len-256-sw-None': (1, 16),
3602
+ 'max_model_len-4096-sw-None': (4, 128),
3603
+ 'max_model_len-512-sw-None': (1, 32),
3604
+ 'max_model_len-8192-sw-None': (4, 128),
3605
+ },
3606
+ 'q_head-2_kv_head-1_head-128': {
3607
+ 'max_model_len-1024-sw-None': (4, 64),
3608
+ 'max_model_len-2048-sw-None': (8, 128),
3609
+ 'max_model_len-256-sw-None': (1, 64),
3610
+ 'max_model_len-4096-sw-None': (8, 256),
3611
+ 'max_model_len-512-sw-None': (2, 64),
3612
+ 'max_model_len-8192-sw-None': (8, 256),
3613
+ },
3614
+ 'q_head-2_kv_head-1_head-256': {
3615
+ 'max_model_len-1024-sw-None': (4, 128),
3616
+ 'max_model_len-2048-sw-None': (8, 32),
3617
+ 'max_model_len-256-sw-None': (1, 32),
3618
+ 'max_model_len-4096-sw-None': (8, 256),
3619
+ 'max_model_len-512-sw-None': (2, 32),
3620
+ 'max_model_len-8192-sw-None': (4, 32),
3621
+ },
3622
+ 'q_head-32_kv_head-1_head-128': {
3623
+ 'max_model_len-1024-sw-None': (2, 32),
3624
+ 'max_model_len-2048-sw-None': (4, 16),
3625
+ 'max_model_len-256-sw-None': (1, 64),
3626
+ 'max_model_len-4096-sw-None': (8, 16),
3627
+ 'max_model_len-512-sw-None': (2, 32),
3628
+ 'max_model_len-8192-sw-None': (8, 64),
3629
+ },
3630
+ 'q_head-32_kv_head-1_head-256': {
3631
+ 'max_model_len-1024-sw-None': (4, 8),
3632
+ 'max_model_len-2048-sw-None': (8, 16),
3633
+ 'max_model_len-256-sw-None': (1, 16),
3634
+ 'max_model_len-4096-sw-None': (8, 16),
3635
+ 'max_model_len-512-sw-None': (2, 16),
3636
+ 'max_model_len-8192-sw-None': (8, 16),
3637
+ },
3638
+ 'q_head-32_kv_head-16_head-128': {
3639
+ 'max_model_len-1024-sw-None': (4, 64),
3640
+ 'max_model_len-2048-sw-None': (4, 64),
3641
+ 'max_model_len-256-sw-None': (1, 64),
3642
+ 'max_model_len-4096-sw-None': (4, 64),
3643
+ 'max_model_len-512-sw-None': (2, 32),
3644
+ 'max_model_len-8192-sw-None': (4, 64),
3645
+ },
3646
+ 'q_head-32_kv_head-16_head-256': {
3647
+ 'max_model_len-1024-sw-None': (2, 32),
3648
+ 'max_model_len-2048-sw-None': (2, 32),
3649
+ 'max_model_len-256-sw-None': (1, 32),
3650
+ 'max_model_len-4096-sw-None': (2, 32),
3651
+ 'max_model_len-512-sw-None': (2, 32),
3652
+ 'max_model_len-8192-sw-None': (2, 32),
3653
+ },
3654
+ 'q_head-32_kv_head-2_head-128': {
3655
+ 'max_model_len-1024-sw-None': (4, 16),
3656
+ 'max_model_len-2048-sw-None': (8, 16),
3657
+ 'max_model_len-256-sw-None': (1, 8),
3658
+ 'max_model_len-4096-sw-None': (4, 32),
3659
+ 'max_model_len-512-sw-None': (2, 16),
3660
+ 'max_model_len-8192-sw-None': (8, 32),
3661
+ },
3662
+ 'q_head-32_kv_head-2_head-256': {
3663
+ 'max_model_len-1024-sw-None': (2, 16),
3664
+ 'max_model_len-2048-sw-None': (8, 16),
3665
+ 'max_model_len-256-sw-None': (1, 32),
3666
+ 'max_model_len-4096-sw-None': (8, 16),
3667
+ 'max_model_len-512-sw-None': (2, 16),
3668
+ 'max_model_len-8192-sw-None': (8, 32),
3669
+ },
3670
+ 'q_head-32_kv_head-4_head-128': {
3671
+ 'max_model_len-1024-sw-None': (4, 64),
3672
+ 'max_model_len-2048-sw-None': (8, 32),
3673
+ 'max_model_len-256-sw-None': (1, 16),
3674
+ 'max_model_len-4096-sw-None': (4, 128),
3675
+ 'max_model_len-512-sw-None': (2, 16),
3676
+ 'max_model_len-8192-sw-None': (4, 128),
3677
+ },
3678
+ 'q_head-32_kv_head-4_head-256': {
3679
+ 'max_model_len-1024-sw-None': (4, 16),
3680
+ 'max_model_len-2048-sw-None': (2, 32),
3681
+ 'max_model_len-256-sw-None': (1, 32),
3682
+ 'max_model_len-4096-sw-None': (8, 32),
3683
+ 'max_model_len-512-sw-None': (2, 32),
3684
+ 'max_model_len-8192-sw-None': (4, 32),
3685
+ },
3686
+ 'q_head-32_kv_head-8_head-128': {
3687
+ 'max_model_len-1024-sw-None': (4, 128),
3688
+ 'max_model_len-2048-sw-None': (4, 128),
3689
+ 'max_model_len-256-sw-None': (1, 32),
3690
+ 'max_model_len-4096-sw-None': (4, 128),
3691
+ 'max_model_len-512-sw-None': (2, 16),
3692
+ 'max_model_len-8192-sw-None': (2, 64),
3693
+ },
3694
+ 'q_head-32_kv_head-8_head-256': {
3695
+ 'max_model_len-1024-sw-None': (2, 64),
3696
+ 'max_model_len-2048-sw-None': (2, 32),
3697
+ 'max_model_len-256-sw-None': (1, 16),
3698
+ 'max_model_len-4096-sw-None': (4, 64),
3699
+ 'max_model_len-512-sw-None': (1, 32),
3700
+ 'max_model_len-8192-sw-None': (4, 64),
3701
+ },
3702
+ 'q_head-4_kv_head-1_head-128': {
3703
+ 'max_model_len-1024-sw-None': (4, 16),
3704
+ 'max_model_len-2048-sw-None': (8, 16),
3705
+ 'max_model_len-256-sw-None': (1, 128),
3706
+ 'max_model_len-4096-sw-None': (4, 128),
3707
+ 'max_model_len-512-sw-None': (2, 128),
3708
+ 'max_model_len-8192-sw-None': (8, 32),
3709
+ },
3710
+ 'q_head-4_kv_head-1_head-256': {
3711
+ 'max_model_len-1024-sw-None': (4, 16),
3712
+ 'max_model_len-2048-sw-None': (4, 32),
3713
+ 'max_model_len-256-sw-None': (1, 64),
3714
+ 'max_model_len-4096-sw-None': (8, 64),
3715
+ 'max_model_len-512-sw-None': (2, 64),
3716
+ 'max_model_len-8192-sw-None': (4, 64),
3717
+ },
3718
+ 'q_head-4_kv_head-2_head-128': {
3719
+ 'max_model_len-1024-sw-None': (4, 256),
3720
+ 'max_model_len-2048-sw-None': (8, 128),
3721
+ 'max_model_len-256-sw-None': (1, 64),
3722
+ 'max_model_len-4096-sw-None': (8, 256),
3723
+ 'max_model_len-512-sw-None': (1, 64),
3724
+ 'max_model_len-8192-sw-None': (8, 128),
3725
+ },
3726
+ 'q_head-4_kv_head-2_head-256': {
3727
+ 'max_model_len-1024-sw-None': (4, 32),
3728
+ 'max_model_len-2048-sw-None': (4, 32),
3729
+ 'max_model_len-256-sw-None': (1, 8),
3730
+ 'max_model_len-4096-sw-None': (8, 64),
3731
+ 'max_model_len-512-sw-None': (2, 64),
3732
+ 'max_model_len-8192-sw-None': (4, 64),
3733
+ },
3734
+ 'q_head-64_kv_head-1_head-128': {
3735
+ 'max_model_len-1024-sw-None': (2, 8),
3736
+ 'max_model_len-2048-sw-None': (8, 16),
3737
+ 'max_model_len-256-sw-None': (1, 32),
3738
+ 'max_model_len-4096-sw-None': (8, 16),
3739
+ 'max_model_len-512-sw-None': (2, 16),
3740
+ 'max_model_len-8192-sw-None': (8, 8),
3741
+ },
3742
+ 'q_head-64_kv_head-1_head-256': {
3743
+ 'max_model_len-1024-sw-None': (4, 8),
3744
+ 'max_model_len-2048-sw-None': (8, 8),
3745
+ 'max_model_len-256-sw-None': (1, 8),
3746
+ 'max_model_len-4096-sw-None': (4, 8),
3747
+ 'max_model_len-512-sw-None': (1, 16),
3748
+ 'max_model_len-8192-sw-None': (8, 16),
3749
+ },
3750
+ 'q_head-64_kv_head-16_head-128': {
3751
+ 'max_model_len-1024-sw-None': (2, 32),
3752
+ 'max_model_len-2048-sw-None': (4, 32),
3753
+ 'max_model_len-256-sw-None': (1, 16),
3754
+ 'max_model_len-4096-sw-None': (2, 32),
3755
+ 'max_model_len-512-sw-None': (2, 32),
3756
+ 'max_model_len-8192-sw-None': (4, 32),
3757
+ },
3758
+ 'q_head-64_kv_head-16_head-256': {
3759
+ 'max_model_len-1024-sw-None': (2, 16),
3760
+ 'max_model_len-2048-sw-None': (2, 16),
3761
+ 'max_model_len-256-sw-None': (1, 16),
3762
+ 'max_model_len-4096-sw-None': (2, 16),
3763
+ 'max_model_len-512-sw-None': (2, 16),
3764
+ 'max_model_len-8192-sw-None': (2, 16),
3765
+ },
3766
+ 'q_head-64_kv_head-2_head-128': {
3767
+ 'max_model_len-1024-sw-None': (4, 16),
3768
+ 'max_model_len-2048-sw-None': (8, 16),
3769
+ 'max_model_len-256-sw-None': (1, 8),
3770
+ 'max_model_len-4096-sw-None': (8, 16),
3771
+ 'max_model_len-512-sw-None': (2, 32),
3772
+ 'max_model_len-8192-sw-None': (8, 16),
3773
+ },
3774
+ 'q_head-64_kv_head-2_head-256': {
3775
+ 'max_model_len-1024-sw-None': (2, 8),
3776
+ 'max_model_len-2048-sw-None': (4, 16),
3777
+ 'max_model_len-256-sw-None': (1, 16),
3778
+ 'max_model_len-4096-sw-None': (4, 16),
3779
+ 'max_model_len-512-sw-None': (2, 8),
3780
+ 'max_model_len-8192-sw-None': (4, 32),
3781
+ },
3782
+ 'q_head-64_kv_head-4_head-128': {
3783
+ 'max_model_len-1024-sw-None': (4, 16),
3784
+ 'max_model_len-2048-sw-None': (8, 32),
3785
+ 'max_model_len-256-sw-None': (1, 32),
3786
+ 'max_model_len-4096-sw-None': (8, 32),
3787
+ 'max_model_len-512-sw-None': (2, 64),
3788
+ 'max_model_len-8192-sw-None': (4, 32),
3789
+ },
3790
+ 'q_head-64_kv_head-4_head-256': {
3791
+ 'max_model_len-1024-sw-None': (4, 32),
3792
+ 'max_model_len-2048-sw-None': (8, 16),
3793
+ 'max_model_len-256-sw-None': (1, 16),
3794
+ 'max_model_len-4096-sw-None': (4, 16),
3795
+ 'max_model_len-512-sw-None': (2, 16),
3796
+ 'max_model_len-8192-sw-None': (4, 32),
3797
+ },
3798
+ 'q_head-64_kv_head-8_head-128': {
3799
+ 'max_model_len-1024-sw-None': (4, 16),
3800
+ 'max_model_len-2048-sw-None': (2, 32),
3801
+ 'max_model_len-256-sw-None': (1, 8),
3802
+ 'max_model_len-4096-sw-None': (8, 32),
3803
+ 'max_model_len-512-sw-None': (2, 64),
3804
+ 'max_model_len-8192-sw-None': (4, 32),
3805
+ },
3806
+ 'q_head-64_kv_head-8_head-256': {
3807
+ 'max_model_len-1024-sw-None': (4, 32),
3808
+ 'max_model_len-2048-sw-None': (4, 32),
3809
+ 'max_model_len-256-sw-None': (1, 8),
3810
+ 'max_model_len-4096-sw-None': (4, 32),
3811
+ 'max_model_len-512-sw-None': (2, 16),
3812
+ 'max_model_len-8192-sw-None': (4, 32),
3813
+ },
3814
+ 'q_head-8_kv_head-1_head-128': {
3815
+ 'max_model_len-1024-sw-None': (4, 8),
3816
+ 'max_model_len-2048-sw-None': (8, 64),
3817
+ 'max_model_len-256-sw-None': (1, 32),
3818
+ 'max_model_len-4096-sw-None': (8, 64),
3819
+ 'max_model_len-512-sw-None': (2, 32),
3820
+ 'max_model_len-8192-sw-None': (8, 32),
3821
+ },
3822
+ 'q_head-8_kv_head-1_head-256': {
3823
+ 'max_model_len-1024-sw-None': (2, 16),
3824
+ 'max_model_len-2048-sw-None': (8, 8),
3825
+ 'max_model_len-256-sw-None': (1, 64),
3826
+ 'max_model_len-4096-sw-None': (8, 64),
3827
+ 'max_model_len-512-sw-None': (2, 16),
3828
+ 'max_model_len-8192-sw-None': (8, 64),
3829
+ },
3830
+ 'q_head-8_kv_head-2_head-128': {
3831
+ 'max_model_len-1024-sw-None': (4, 64),
3832
+ 'max_model_len-2048-sw-None': (8, 16),
3833
+ 'max_model_len-256-sw-None': (1, 16),
3834
+ 'max_model_len-4096-sw-None': (8, 32),
3835
+ 'max_model_len-512-sw-None': (2, 128),
3836
+ 'max_model_len-8192-sw-None': (8, 32),
3837
+ },
3838
+ 'q_head-8_kv_head-2_head-256': {
3839
+ 'max_model_len-1024-sw-None': (2, 32),
3840
+ 'max_model_len-2048-sw-None': (2, 32),
3841
+ 'max_model_len-256-sw-None': (1, 32),
3842
+ 'max_model_len-4096-sw-None': (4, 64),
3843
+ 'max_model_len-512-sw-None': (2, 16),
3844
+ 'max_model_len-8192-sw-None': (4, 64),
3845
+ },
3846
+ 'q_head-8_kv_head-4_head-128': {
3847
+ 'max_model_len-1024-sw-None': (4, 256),
3848
+ 'max_model_len-2048-sw-None': (4, 32),
3849
+ 'max_model_len-256-sw-None': (1, 64),
3850
+ 'max_model_len-4096-sw-None': (8, 64),
3851
+ 'max_model_len-512-sw-None': (2, 64),
3852
+ 'max_model_len-8192-sw-None': (4, 64),
3853
+ },
3854
+ 'q_head-8_kv_head-4_head-256': {
3855
+ 'max_model_len-1024-sw-None': (4, 64),
3856
+ 'max_model_len-2048-sw-None': (4, 64),
3857
+ 'max_model_len-256-sw-None': (1, 64),
3858
+ 'max_model_len-4096-sw-None': (4, 128),
3859
+ 'max_model_len-512-sw-None': (2, 64),
3860
+ 'max_model_len-8192-sw-None': (4, 128),
3861
+ },
3862
+ }
3863
+ },
3864
+ 64: {
3865
+ 'q_bfloat16_kv_bfloat16': {
3866
+ 'q_head-128_kv_head-1_head-128': {
3867
+ 'max_model_len-1024-sw-None': (8, 16),
3868
+ 'max_model_len-128-sw-None': (2, 16),
3869
+ 'max_model_len-2048-sw-None': (16, 16),
3870
+ 'max_model_len-256-sw-None': (4, 8),
3871
+ 'max_model_len-512-sw-None': (4, 16),
3872
+ 'max_model_len-64-sw-None': (1, 8),
3873
+ },
3874
+ 'q_head-128_kv_head-1_head-256': {
3875
+ 'max_model_len-1024-sw-None': (16, 8),
3876
+ 'max_model_len-2048-sw-None': (32, 8),
3877
+ 'max_model_len-256-sw-None': (2, 8),
3878
+ 'max_model_len-512-sw-None': (8, 8),
3879
+ 'max_model_len-64-sw-None': (1, 8),
3880
+ 'max_model_len-8192-sw-None': (32, 8),
3881
+ },
3882
+ 'q_head-128_kv_head-16_head-128': {
3883
+ 'max_model_len-1024-sw-None': (16, 16),
3884
+ 'max_model_len-128-sw-None': (2, 16),
3885
+ 'max_model_len-256-sw-None': (2, 8),
3886
+ 'max_model_len-512-sw-None': (8, 16),
3887
+ 'max_model_len-64-sw-None': (1, 8),
3888
+ },
3889
+ 'q_head-128_kv_head-16_head-256': {
3890
+ 'max_model_len-128-sw-None': (2, 8),
3891
+ 'max_model_len-256-sw-None': (4, 8),
3892
+ 'max_model_len-4096-sw-None': (8, 8),
3893
+ 'max_model_len-512-sw-None': (8, 8),
3894
+ 'max_model_len-64-sw-None': (1, 8),
3895
+ },
3896
+ 'q_head-128_kv_head-2_head-128': {
3897
+ 'max_model_len-1024-sw-None': (16, 16),
3898
+ 'max_model_len-2048-sw-None': (16, 8),
3899
+ 'max_model_len-256-sw-None': (4, 8),
3900
+ 'max_model_len-4096-sw-None': (16, 16),
3901
+ 'max_model_len-512-sw-None': (8, 16),
3902
+ 'max_model_len-64-sw-None': (1, 8),
3903
+ 'max_model_len-8192-sw-None': (32, 16),
3904
+ },
3905
+ 'q_head-128_kv_head-2_head-256': {
3906
+ 'max_model_len-1024-sw-None': (16, 8),
3907
+ 'max_model_len-2048-sw-None': (16, 8),
3908
+ 'max_model_len-256-sw-None': (4, 8),
3909
+ 'max_model_len-4096-sw-None': (32, 8),
3910
+ },
3911
+ 'q_head-128_kv_head-4_head-128': {
3912
+ 'max_model_len-1024-sw-None': (16, 8),
3913
+ 'max_model_len-128-sw-None': (1, 8),
3914
+ 'max_model_len-2048-sw-None': (16, 8),
3915
+ 'max_model_len-4096-sw-None': (16, 16),
3916
+ 'max_model_len-512-sw-None': (8, 32),
3917
+ 'max_model_len-64-sw-None': (1, 32),
3918
+ 'max_model_len-8192-sw-None': (16, 32),
3919
+ },
3920
+ 'q_head-128_kv_head-4_head-256': {
3921
+ 'max_model_len-1024-sw-None': (8, 8),
3922
+ 'max_model_len-128-sw-None': (2, 8),
3923
+ 'max_model_len-2048-sw-None': (16, 8),
3924
+ 'max_model_len-256-sw-None': (4, 8),
3925
+ 'max_model_len-4096-sw-None': (32, 32),
3926
+ 'max_model_len-64-sw-None': (1, 8),
3927
+ 'max_model_len-8192-sw-None': (32, 32),
3928
+ },
3929
+ 'q_head-128_kv_head-8_head-128': {
3930
+ 'max_model_len-1024-sw-None': (8, 16),
3931
+ 'max_model_len-4096-sw-None': (8, 16),
3932
+ 'max_model_len-64-sw-None': (1, 8),
3933
+ 'max_model_len-8192-sw-None': (8, 32),
3934
+ },
3935
+ 'q_head-128_kv_head-8_head-256': {
3936
+ 'max_model_len-128-sw-None': (2, 8),
3937
+ 'max_model_len-256-sw-None': (4, 8),
3938
+ 'max_model_len-4096-sw-None': (16, 16),
3939
+ 'max_model_len-64-sw-None': (1, 8),
3940
+ 'max_model_len-8192-sw-None': (8, 16),
3941
+ },
3942
+ 'q_head-16_kv_head-1_head-128': {
3943
+ 'max_model_len-1024-sw-None': (16, 8),
3944
+ 'max_model_len-128-sw-None': (2, 16),
3945
+ 'max_model_len-2048-sw-None': (16, 64),
3946
+ 'max_model_len-256-sw-None': (4, 8),
3947
+ 'max_model_len-4096-sw-None': (32, 64),
3948
+ 'max_model_len-512-sw-None': (8, 16),
3949
+ 'max_model_len-64-sw-None': (1, 128),
3950
+ 'max_model_len-8192-sw-None': (32, 128),
3951
+ },
3952
+ 'q_head-16_kv_head-1_head-256': {
3953
+ 'max_model_len-1024-sw-None': (8, 16),
3954
+ 'max_model_len-128-sw-None': (2, 32),
3955
+ 'max_model_len-2048-sw-None': (32, 8),
3956
+ 'max_model_len-256-sw-None': (4, 64),
3957
+ 'max_model_len-4096-sw-None': (32, 16),
3958
+ 'max_model_len-512-sw-None': (8, 8),
3959
+ 'max_model_len-64-sw-None': (1, 16),
3960
+ 'max_model_len-8192-sw-None': (32, 16),
3961
+ },
3962
+ 'q_head-16_kv_head-2_head-128': {
3963
+ 'max_model_len-1024-sw-None': (16, 16),
3964
+ 'max_model_len-128-sw-None': (2, 64),
3965
+ 'max_model_len-2048-sw-None': (16, 16),
3966
+ 'max_model_len-256-sw-None': (4, 128),
3967
+ 'max_model_len-4096-sw-None': (32, 32),
3968
+ 'max_model_len-512-sw-None': (8, 64),
3969
+ 'max_model_len-64-sw-None': (1, 16),
3970
+ 'max_model_len-8192-sw-None': (32, 64),
3971
+ },
3972
+ 'q_head-16_kv_head-2_head-256': {
3973
+ 'max_model_len-1024-sw-None': (16, 16),
3974
+ 'max_model_len-128-sw-None': (2, 8),
3975
+ 'max_model_len-2048-sw-None': (16, 32),
3976
+ 'max_model_len-256-sw-None': (4, 8),
3977
+ 'max_model_len-4096-sw-None': (8, 32),
3978
+ 'max_model_len-512-sw-None': (8, 16),
3979
+ 'max_model_len-64-sw-None': (1, 8),
3980
+ 'max_model_len-8192-sw-None': (32, 32),
3981
+ },
3982
+ 'q_head-16_kv_head-4_head-128': {
3983
+ 'max_model_len-1024-sw-None': (8, 64),
3984
+ 'max_model_len-128-sw-None': (2, 32),
3985
+ 'max_model_len-2048-sw-None': (16, 32),
3986
+ 'max_model_len-256-sw-None': (4, 128),
3987
+ 'max_model_len-4096-sw-None': (16, 32),
3988
+ 'max_model_len-512-sw-None': (4, 128),
3989
+ 'max_model_len-64-sw-None': (1, 16),
3990
+ 'max_model_len-8192-sw-None': (16, 128),
3991
+ },
3992
+ 'q_head-16_kv_head-4_head-256': {
3993
+ 'max_model_len-1024-sw-None': (16, 32),
3994
+ 'max_model_len-128-sw-None': (2, 32),
3995
+ 'max_model_len-2048-sw-None': (16, 128),
3996
+ 'max_model_len-256-sw-None': (4, 32),
3997
+ 'max_model_len-4096-sw-None': (16, 128),
3998
+ 'max_model_len-512-sw-None': (4, 32),
3999
+ 'max_model_len-64-sw-None': (1, 8),
4000
+ 'max_model_len-8192-sw-None': (16, 32),
4001
+ },
4002
+ 'q_head-16_kv_head-8_head-128': {
4003
+ 'max_model_len-1024-sw-None': (8, 64),
4004
+ 'max_model_len-128-sw-None': (2, 32),
4005
+ 'max_model_len-2048-sw-None': (8, 64),
4006
+ 'max_model_len-256-sw-None': (4, 64),
4007
+ 'max_model_len-4096-sw-None': (32, 64),
4008
+ 'max_model_len-512-sw-None': (8, 8),
4009
+ 'max_model_len-64-sw-None': (1, 16),
4010
+ 'max_model_len-8192-sw-None': (8, 128),
4011
+ },
4012
+ 'q_head-16_kv_head-8_head-256': {
4013
+ 'max_model_len-1024-sw-None': (8, 128),
4014
+ 'max_model_len-128-sw-None': (2, 8),
4015
+ 'max_model_len-2048-sw-None': (8, 64),
4016
+ 'max_model_len-256-sw-None': (4, 32),
4017
+ 'max_model_len-4096-sw-None': (8, 128),
4018
+ 'max_model_len-512-sw-None': (8, 64),
4019
+ 'max_model_len-64-sw-None': (1, 8),
4020
+ 'max_model_len-8192-sw-None': (8, 128),
4021
+ },
4022
+ 'q_head-2_kv_head-1_head-128': {
4023
+ 'max_model_len-1024-sw-None': (16, 256),
4024
+ 'max_model_len-128-sw-None': (1, 8),
4025
+ 'max_model_len-2048-sw-None': (32, 32),
4026
+ 'max_model_len-256-sw-None': (4, 16),
4027
+ 'max_model_len-4096-sw-None': (32, 64),
4028
+ 'max_model_len-512-sw-None': (8, 256),
4029
+ 'max_model_len-64-sw-None': (1, 256),
4030
+ 'max_model_len-8192-sw-None': (32, 128),
4031
+ },
4032
+ 'q_head-2_kv_head-1_head-256': {
4033
+ 'max_model_len-1024-sw-None': (8, 64),
4034
+ 'max_model_len-2048-sw-None': (16, 64),
4035
+ 'max_model_len-256-sw-None': (2, 32),
4036
+ 'max_model_len-4096-sw-None': (32, 128),
4037
+ 'max_model_len-512-sw-None': (8, 32),
4038
+ 'max_model_len-8192-sw-None': (32, 64),
4039
+ },
4040
+ 'q_head-32_kv_head-1_head-128': {
4041
+ 'max_model_len-1024-sw-None': (16, 16),
4042
+ 'max_model_len-128-sw-None': (2, 16),
4043
+ 'max_model_len-2048-sw-None': (16, 16),
4044
+ 'max_model_len-256-sw-None': (4, 8),
4045
+ 'max_model_len-4096-sw-None': (32, 16),
4046
+ 'max_model_len-512-sw-None': (8, 16),
4047
+ 'max_model_len-64-sw-None': (1, 32),
4048
+ 'max_model_len-8192-sw-None': (32, 32),
4049
+ },
4050
+ 'q_head-32_kv_head-1_head-256': {
4051
+ 'max_model_len-1024-sw-None': (8, 16),
4052
+ 'max_model_len-128-sw-None': (2, 16),
4053
+ 'max_model_len-2048-sw-None': (16, 8),
4054
+ 'max_model_len-256-sw-None': (4, 16),
4055
+ 'max_model_len-4096-sw-None': (32, 32),
4056
+ 'max_model_len-512-sw-None': (8, 16),
4057
+ 'max_model_len-64-sw-None': (1, 16),
4058
+ 'max_model_len-8192-sw-None': (32, 16),
4059
+ },
4060
+ 'q_head-32_kv_head-16_head-128': {
4061
+ 'max_model_len-1024-sw-None': (16, 64),
4062
+ 'max_model_len-128-sw-None': (2, 64),
4063
+ 'max_model_len-2048-sw-None': (16, 64),
4064
+ 'max_model_len-256-sw-None': (2, 32),
4065
+ 'max_model_len-4096-sw-None': (16, 64),
4066
+ 'max_model_len-512-sw-None': (8, 32),
4067
+ 'max_model_len-64-sw-None': (1, 8),
4068
+ 'max_model_len-8192-sw-None': (16, 64),
4069
+ },
4070
+ 'q_head-32_kv_head-16_head-256': {
4071
+ 'max_model_len-1024-sw-None': (8, 32),
4072
+ 'max_model_len-128-sw-None': (2, 8),
4073
+ 'max_model_len-2048-sw-None': (8, 32),
4074
+ 'max_model_len-256-sw-None': (4, 8),
4075
+ 'max_model_len-4096-sw-None': (8, 32),
4076
+ 'max_model_len-512-sw-None': (8, 32),
4077
+ 'max_model_len-64-sw-None': (1, 16),
4078
+ 'max_model_len-8192-sw-None': (4, 32),
4079
+ },
4080
+ 'q_head-32_kv_head-2_head-128': {
4081
+ 'max_model_len-1024-sw-None': (16, 16),
4082
+ 'max_model_len-128-sw-None': (2, 32),
4083
+ 'max_model_len-2048-sw-None': (16, 16),
4084
+ 'max_model_len-256-sw-None': (4, 8),
4085
+ 'max_model_len-4096-sw-None': (32, 64),
4086
+ 'max_model_len-512-sw-None': (8, 32),
4087
+ 'max_model_len-64-sw-None': (1, 8),
4088
+ 'max_model_len-8192-sw-None': (32, 64),
4089
+ },
4090
+ 'q_head-32_kv_head-2_head-256': {
4091
+ 'max_model_len-1024-sw-None': (16, 32),
4092
+ 'max_model_len-128-sw-None': (2, 8),
4093
+ 'max_model_len-2048-sw-None': (32, 32),
4094
+ 'max_model_len-256-sw-None': (4, 8),
4095
+ 'max_model_len-4096-sw-None': (16, 32),
4096
+ 'max_model_len-512-sw-None': (8, 32),
4097
+ 'max_model_len-64-sw-None': (1, 8),
4098
+ 'max_model_len-8192-sw-None': (32, 32),
4099
+ },
4100
+ 'q_head-32_kv_head-4_head-128': {
4101
+ 'max_model_len-1024-sw-None': (8, 32),
4102
+ 'max_model_len-128-sw-None': (1, 64),
4103
+ 'max_model_len-2048-sw-None': (32, 16),
4104
+ 'max_model_len-256-sw-None': (4, 32),
4105
+ 'max_model_len-4096-sw-None': (16, 16),
4106
+ 'max_model_len-512-sw-None': (8, 16),
4107
+ 'max_model_len-64-sw-None': (1, 8),
4108
+ 'max_model_len-8192-sw-None': (16, 32),
4109
+ },
4110
+ 'q_head-32_kv_head-4_head-256': {
4111
+ 'max_model_len-1024-sw-None': (8, 32),
4112
+ 'max_model_len-128-sw-None': (2, 16),
4113
+ 'max_model_len-2048-sw-None': (16, 32),
4114
+ 'max_model_len-256-sw-None': (4, 16),
4115
+ 'max_model_len-4096-sw-None': (16, 32),
4116
+ 'max_model_len-512-sw-None': (4, 16),
4117
+ 'max_model_len-64-sw-None': (1, 16),
4118
+ 'max_model_len-8192-sw-None': (16, 32),
4119
+ },
4120
+ 'q_head-32_kv_head-8_head-128': {
4121
+ 'max_model_len-1024-sw-None': (16, 32),
4122
+ 'max_model_len-128-sw-None': (2, 16),
4123
+ 'max_model_len-2048-sw-None': (16, 32),
4124
+ 'max_model_len-256-sw-None': (2, 16),
4125
+ 'max_model_len-4096-sw-None': (32, 32),
4126
+ 'max_model_len-512-sw-None': (8, 32),
4127
+ 'max_model_len-64-sw-None': (1, 16),
4128
+ 'max_model_len-8192-sw-None': (32, 32),
4129
+ },
4130
+ 'q_head-32_kv_head-8_head-256': {
4131
+ 'max_model_len-1024-sw-None': (8, 32),
4132
+ 'max_model_len-128-sw-None': (2, 16),
4133
+ 'max_model_len-2048-sw-None': (8, 64),
4134
+ 'max_model_len-256-sw-None': (4, 16),
4135
+ 'max_model_len-4096-sw-None': (16, 64),
4136
+ 'max_model_len-512-sw-None': (8, 32),
4137
+ 'max_model_len-64-sw-None': (1, 16),
4138
+ 'max_model_len-8192-sw-None': (8, 64),
4139
+ },
4140
+ 'q_head-4_kv_head-1_head-128': {
4141
+ 'max_model_len-1024-sw-None': (16, 32),
4142
+ 'max_model_len-128-sw-None': (2, 16),
4143
+ 'max_model_len-2048-sw-None': (32, 128),
4144
+ 'max_model_len-256-sw-None': (4, 8),
4145
+ 'max_model_len-4096-sw-None': (32, 16),
4146
+ 'max_model_len-512-sw-None': (4, 32),
4147
+ 'max_model_len-64-sw-None': (1, 32),
4148
+ 'max_model_len-8192-sw-None': (32, 128),
4149
+ },
4150
+ 'q_head-4_kv_head-1_head-256': {
4151
+ 'max_model_len-1024-sw-None': (16, 128),
4152
+ 'max_model_len-128-sw-None': (1, 32),
4153
+ 'max_model_len-2048-sw-None': (32, 32),
4154
+ 'max_model_len-256-sw-None': (4, 32),
4155
+ 'max_model_len-4096-sw-None': (32, 64),
4156
+ 'max_model_len-512-sw-None': (8, 64),
4157
+ 'max_model_len-64-sw-None': (1, 128),
4158
+ 'max_model_len-8192-sw-None': (32, 64),
4159
+ },
4160
+ 'q_head-4_kv_head-2_head-128': {
4161
+ 'max_model_len-1024-sw-None': (16, 256),
4162
+ 'max_model_len-128-sw-None': (2, 256),
4163
+ 'max_model_len-2048-sw-None': (32, 32),
4164
+ 'max_model_len-256-sw-None': (4, 8),
4165
+ 'max_model_len-4096-sw-None': (32, 64),
4166
+ 'max_model_len-512-sw-None': (8, 32),
4167
+ 'max_model_len-64-sw-None': (1, 32),
4168
+ 'max_model_len-8192-sw-None': (32, 64),
4169
+ },
4170
+ 'q_head-4_kv_head-2_head-256': {
4171
+ 'max_model_len-1024-sw-None': (8, 64),
4172
+ 'max_model_len-128-sw-None': (2, 32),
4173
+ 'max_model_len-2048-sw-None': (32, 128),
4174
+ 'max_model_len-256-sw-None': (4, 8),
4175
+ 'max_model_len-4096-sw-None': (32, 128),
4176
+ 'max_model_len-512-sw-None': (8, 16),
4177
+ 'max_model_len-64-sw-None': (1, 16),
4178
+ 'max_model_len-8192-sw-None': (16, 128),
4179
+ },
4180
+ 'q_head-64_kv_head-1_head-128': {
4181
+ 'max_model_len-1024-sw-None': (16, 16),
4182
+ 'max_model_len-128-sw-None': (2, 16),
4183
+ 'max_model_len-2048-sw-None': (32, 16),
4184
+ 'max_model_len-256-sw-None': (4, 8),
4185
+ 'max_model_len-4096-sw-None': (32, 16),
4186
+ 'max_model_len-512-sw-None': (8, 8),
4187
+ 'max_model_len-64-sw-None': (1, 16),
4188
+ },
4189
+ 'q_head-64_kv_head-1_head-256': {
4190
+ 'max_model_len-1024-sw-None': (16, 16),
4191
+ 'max_model_len-128-sw-None': (2, 16),
4192
+ 'max_model_len-2048-sw-None': (32, 8),
4193
+ 'max_model_len-256-sw-None': (2, 8),
4194
+ 'max_model_len-4096-sw-None': (32, 8),
4195
+ 'max_model_len-512-sw-None': (8, 8),
4196
+ 'max_model_len-64-sw-None': (1, 8),
4197
+ },
4198
+ 'q_head-64_kv_head-16_head-128': {
4199
+ 'max_model_len-1024-sw-None': (16, 32),
4200
+ 'max_model_len-128-sw-None': (2, 16),
4201
+ 'max_model_len-256-sw-None': (4, 16),
4202
+ 'max_model_len-4096-sw-None': (8, 32),
4203
+ 'max_model_len-512-sw-None': (8, 16),
4204
+ 'max_model_len-64-sw-None': (1, 16),
4205
+ 'max_model_len-8192-sw-None': (16, 32),
4206
+ },
4207
+ 'q_head-64_kv_head-16_head-256': {
4208
+ 'max_model_len-1024-sw-None': (4, 16),
4209
+ 'max_model_len-128-sw-None': (2, 16),
4210
+ 'max_model_len-2048-sw-None': (8, 16),
4211
+ 'max_model_len-256-sw-None': (4, 16),
4212
+ 'max_model_len-4096-sw-None': (8, 16),
4213
+ 'max_model_len-512-sw-None': (8, 16),
4214
+ 'max_model_len-64-sw-None': (1, 16),
4215
+ 'max_model_len-8192-sw-None': (8, 16),
4216
+ },
4217
+ 'q_head-64_kv_head-2_head-128': {
4218
+ 'max_model_len-1024-sw-None': (16, 16),
4219
+ 'max_model_len-128-sw-None': (2, 32),
4220
+ 'max_model_len-2048-sw-None': (32, 32),
4221
+ 'max_model_len-256-sw-None': (4, 16),
4222
+ 'max_model_len-4096-sw-None': (32, 16),
4223
+ 'max_model_len-512-sw-None': (8, 64),
4224
+ 'max_model_len-64-sw-None': (1, 32),
4225
+ },
4226
+ 'q_head-64_kv_head-2_head-256': {
4227
+ 'max_model_len-1024-sw-None': (16, 16),
4228
+ 'max_model_len-128-sw-None': (2, 16),
4229
+ 'max_model_len-2048-sw-None': (32, 16),
4230
+ 'max_model_len-256-sw-None': (4, 8),
4231
+ 'max_model_len-4096-sw-None': (16, 16),
4232
+ 'max_model_len-512-sw-None': (8, 8),
4233
+ 'max_model_len-64-sw-None': (1, 8),
4234
+ 'max_model_len-8192-sw-None': (32, 16),
4235
+ },
4236
+ 'q_head-64_kv_head-4_head-128': {
4237
+ 'max_model_len-1024-sw-None': (8, 16),
4238
+ 'max_model_len-128-sw-None': (1, 8),
4239
+ 'max_model_len-2048-sw-None': (16, 32),
4240
+ 'max_model_len-256-sw-None': (4, 8),
4241
+ 'max_model_len-4096-sw-None': (16, 16),
4242
+ 'max_model_len-512-sw-None': (8, 64),
4243
+ 'max_model_len-64-sw-None': (1, 8),
4244
+ 'max_model_len-8192-sw-None': (16, 32),
4245
+ },
4246
+ 'q_head-64_kv_head-4_head-256': {
4247
+ 'max_model_len-1024-sw-None': (16, 16),
4248
+ 'max_model_len-2048-sw-None': (16, 32),
4249
+ 'max_model_len-256-sw-None': (4, 8),
4250
+ 'max_model_len-4096-sw-None': (16, 16),
4251
+ 'max_model_len-64-sw-None': (1, 8),
4252
+ 'max_model_len-8192-sw-None': (16, 32),
4253
+ },
4254
+ 'q_head-64_kv_head-8_head-128': {
4255
+ 'max_model_len-1024-sw-None': (16, 64),
4256
+ 'max_model_len-128-sw-None': (2, 16),
4257
+ 'max_model_len-2048-sw-None': (16, 32),
4258
+ 'max_model_len-256-sw-None': (4, 16),
4259
+ 'max_model_len-4096-sw-None': (16, 64),
4260
+ 'max_model_len-64-sw-None': (1, 32),
4261
+ 'max_model_len-8192-sw-None': (16, 32),
4262
+ },
4263
+ 'q_head-64_kv_head-8_head-256': {
4264
+ 'max_model_len-1024-sw-None': (8, 32),
4265
+ 'max_model_len-128-sw-None': (2, 8),
4266
+ 'max_model_len-2048-sw-None': (16, 32),
4267
+ 'max_model_len-256-sw-None': (4, 16),
4268
+ 'max_model_len-4096-sw-None': (16, 32),
4269
+ 'max_model_len-512-sw-None': (8, 32),
4270
+ 'max_model_len-64-sw-None': (1, 8),
4271
+ 'max_model_len-8192-sw-None': (16, 32),
4272
+ },
4273
+ 'q_head-8_kv_head-1_head-128': {
4274
+ 'max_model_len-1024-sw-None': (16, 64),
4275
+ 'max_model_len-128-sw-None': (2, 64),
4276
+ 'max_model_len-2048-sw-None': (32, 32),
4277
+ 'max_model_len-256-sw-None': (4, 128),
4278
+ 'max_model_len-4096-sw-None': (32, 32),
4279
+ 'max_model_len-512-sw-None': (8, 8),
4280
+ 'max_model_len-64-sw-None': (1, 128),
4281
+ 'max_model_len-8192-sw-None': (32, 32),
4282
+ },
4283
+ 'q_head-8_kv_head-1_head-256': {
4284
+ 'max_model_len-1024-sw-None': (16, 64),
4285
+ 'max_model_len-128-sw-None': (2, 32),
4286
+ 'max_model_len-2048-sw-None': (32, 32),
4287
+ 'max_model_len-256-sw-None': (4, 16),
4288
+ 'max_model_len-4096-sw-None': (32, 64),
4289
+ 'max_model_len-512-sw-None': (8, 8),
4290
+ 'max_model_len-64-sw-None': (1, 32),
4291
+ 'max_model_len-8192-sw-None': (32, 32),
4292
+ },
4293
+ 'q_head-8_kv_head-2_head-128': {
4294
+ 'max_model_len-1024-sw-None': (16, 64),
4295
+ 'max_model_len-128-sw-None': (2, 64),
4296
+ 'max_model_len-2048-sw-None': (32, 32),
4297
+ 'max_model_len-256-sw-None': (4, 128),
4298
+ 'max_model_len-4096-sw-None': (32, 32),
4299
+ 'max_model_len-512-sw-None': (8, 128),
4300
+ 'max_model_len-64-sw-None': (1, 16),
4301
+ 'max_model_len-8192-sw-None': (32, 32),
4302
+ },
4303
+ 'q_head-8_kv_head-2_head-256': {
4304
+ 'max_model_len-1024-sw-None': (16, 128),
4305
+ 'max_model_len-128-sw-None': (2, 64),
4306
+ 'max_model_len-2048-sw-None': (32, 32),
4307
+ 'max_model_len-256-sw-None': (4, 8),
4308
+ 'max_model_len-4096-sw-None': (16, 32),
4309
+ 'max_model_len-512-sw-None': (8, 64),
4310
+ 'max_model_len-64-sw-None': (1, 16),
4311
+ 'max_model_len-8192-sw-None': (32, 128),
4312
+ },
4313
+ 'q_head-8_kv_head-4_head-128': {
4314
+ 'max_model_len-1024-sw-None': (16, 32),
4315
+ 'max_model_len-128-sw-None': (2, 32),
4316
+ 'max_model_len-2048-sw-None': (32, 64),
4317
+ 'max_model_len-256-sw-None': (4, 32),
4318
+ 'max_model_len-4096-sw-None': (16, 64),
4319
+ 'max_model_len-512-sw-None': (8, 64),
4320
+ 'max_model_len-64-sw-None': (1, 16),
4321
+ 'max_model_len-8192-sw-None': (16, 64),
4322
+ },
4323
+ 'q_head-8_kv_head-4_head-256': {
4324
+ 'max_model_len-1024-sw-None': (8, 32),
4325
+ 'max_model_len-128-sw-None': (2, 32),
4326
+ 'max_model_len-2048-sw-None': (8, 128),
4327
+ 'max_model_len-256-sw-None': (4, 64),
4328
+ 'max_model_len-4096-sw-None': (8, 128),
4329
+ 'max_model_len-512-sw-None': (8, 128),
4330
+ 'max_model_len-64-sw-None': (1, 64),
4331
+ 'max_model_len-8192-sw-None': (8, 128),
4332
+ },
4333
+ }
4334
+ },
4335
+ },
4336
+ }
4337
+
4338
+
4339
+ def get_tuned_block_sizes(
4340
+ q_dtype,
4341
+ kv_dtype,
4342
+ actual_num_q_heads,
4343
+ actual_num_kv_heads,
4344
+ head_dim,
4345
+ page_size,
4346
+ max_num_tokens,
4347
+ pages_per_seq,
4348
+ sliding_window=None,
4349
+ ) -> tuple[int, int]:
4350
+ """Search tuned values for (num_kv_pages_per_blk, num_queries_per_blk)."""
4351
+
4352
+ keys = get_lookup_keys(
4353
+ page_size,
4354
+ q_dtype,
4355
+ kv_dtype,
4356
+ actual_num_q_heads,
4357
+ actual_num_kv_heads,
4358
+ head_dim,
4359
+ page_size * pages_per_seq,
4360
+ sliding_window,
4361
+ )
4362
+ device, page_size, dtypes, head_dims, extra = keys
4363
+
4364
+ try:
4365
+ bkv_p, bq = TUNED_BLOCK_SIZES[device][page_size][dtypes][head_dims][
4366
+ extra]
4367
+ except KeyError:
4368
+ logger.warning_once(
4369
+ 'Couldn`t find tuned sizes for the RPA v3 kernel with %s', keys)
4370
+ # When not available use a sensible default based on TPU version
4371
+ # Set default block sizes for each tpu_version.
4372
+ tpu_version = get_tpu_version()
4373
+ if tpu_version < 4:
4374
+ raise NotImplementedError('TPU version must be 4 or higher.')
4375
+ match tpu_version:
4376
+ case 4:
4377
+ # TPUv4 has much smaller VMEM size so we pick fixed block sizes.
4378
+ bkv_p, bq = (512 // page_size, 32)
4379
+ case 7:
4380
+ bkv_p, bq = (4096 // page_size, 32)
4381
+ case _:
4382
+ bkv_p, bq = (2048 // page_size, 32)
4383
+
4384
+ bkv_p, bq = (min(pages_per_seq, bkv_p), min(max_num_tokens, bq))
4385
+
4386
+ logger.info_once('RPA v3 kernel tuned block sizes for %s: bkv_p=%s, bq=%s',
4387
+ keys, bkv_p, bq)
4388
+ return bkv_p, bq
4389
+
4390
+
4391
+ def get_lookup_keys(
4392
+ page_size,
4393
+ q_dtype,
4394
+ kv_dtype,
4395
+ num_q_heads,
4396
+ num_kv_heads,
4397
+ head_dim,
4398
+ max_model_len,
4399
+ sliding_window,
4400
+ ):
4401
+ """Get the lookup keys for tuned block sizes."""
4402
+ (
4403
+ page_size,
4404
+ q_dtype_name,
4405
+ kv_dtype_name,
4406
+ num_q_heads,
4407
+ num_kv_heads,
4408
+ head_dim,
4409
+ max_model_len,
4410
+ sliding_window,
4411
+ ) = get_simplified_raw_key(
4412
+ page_size,
4413
+ q_dtype,
4414
+ kv_dtype,
4415
+ num_q_heads,
4416
+ num_kv_heads,
4417
+ head_dim,
4418
+ max_model_len,
4419
+ sliding_window,
4420
+ )
4421
+
4422
+ return (
4423
+ get_device_name(),
4424
+ next_power_of_2(page_size),
4425
+ f'q_{q_dtype_name}_kv_{kv_dtype_name}',
4426
+ f'q_head-{num_q_heads}_kv_head-{num_kv_heads}_head-{head_dim}',
4427
+ f'max_model_len-{next_power_of_2(max_model_len)}-sw-{sliding_window}',
4428
+ )
4429
+
4430
+
4431
+ def get_simplified_raw_key(
4432
+ page_size,
4433
+ q_dtype,
4434
+ kv_dtype,
4435
+ actual_num_q_heads,
4436
+ actual_num_kv_heads,
4437
+ head_dim,
4438
+ max_model_len,
4439
+ sliding_window,
4440
+ ):
4441
+ """Get the simplified key."""
4442
+ assert actual_num_q_heads % actual_num_kv_heads == 0
4443
+ actual_num_q_heads_per_kv_head = actual_num_q_heads // actual_num_kv_heads
4444
+ q_packing = get_dtype_packing(q_dtype)
4445
+ kv_packing = get_dtype_packing(kv_dtype)
4446
+ num_kv_heads_x2 = align_to(actual_num_kv_heads * 2, kv_packing)
4447
+ num_q_heads_per_kv_head = align_to(actual_num_q_heads_per_kv_head,
4448
+ q_packing)
4449
+ assert num_kv_heads_x2 % 2 == 0
4450
+
4451
+ return (
4452
+ next_power_of_2(page_size),
4453
+ jnp.dtype(q_dtype).name,
4454
+ jnp.dtype(kv_dtype).name,
4455
+ next_power_of_2(num_q_heads_per_kv_head * actual_num_kv_heads),
4456
+ next_power_of_2(num_kv_heads_x2) // 2,
4457
+ align_to(head_dim, 128),
4458
+ next_power_of_2(max_model_len),
4459
+ sliding_window,
4460
+ )