vec-inf 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +18 -4
- vec_inf/cli/_cli.py +212 -30
- vec_inf/cli/_helper.py +95 -14
- vec_inf/client/_client_vars.py +19 -152
- vec_inf/client/_helper.py +386 -53
- vec_inf/client/_slurm_script_generator.py +210 -43
- vec_inf/client/_slurm_templates.py +248 -0
- vec_inf/client/_slurm_vars.py +82 -0
- vec_inf/client/_utils.py +190 -71
- vec_inf/client/api.py +96 -25
- vec_inf/client/config.py +46 -15
- vec_inf/client/models.py +51 -2
- vec_inf/config/README.md +4 -243
- vec_inf/config/environment.yaml +31 -0
- vec_inf/config/models.yaml +102 -281
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/METADATA +25 -67
- vec_inf-0.7.0.dist-info/RECORD +27 -0
- vec_inf/client/slurm_vars.py +0 -49
- vec_inf-0.6.0.dist-info/RECORD +0 -25
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/WHEEL +0 -0
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/config/models.yaml
CHANGED
|
@@ -1,20 +1,4 @@
|
|
|
1
1
|
models:
|
|
2
|
-
c4ai-command-r-plus:
|
|
3
|
-
model_family: c4ai-command-r
|
|
4
|
-
model_variant: plus
|
|
5
|
-
model_type: LLM
|
|
6
|
-
gpus_per_node: 4
|
|
7
|
-
num_nodes: 2
|
|
8
|
-
vocab_size: 256000
|
|
9
|
-
qos: m2
|
|
10
|
-
time: 08:00:00
|
|
11
|
-
partition: a40
|
|
12
|
-
vllm_args:
|
|
13
|
-
--pipeline-parallel-size: 2
|
|
14
|
-
--tensor-parallel-size: 4
|
|
15
|
-
--max-model-len: 8192
|
|
16
|
-
--max-num-seqs: 256
|
|
17
|
-
--compilation-config: 3
|
|
18
2
|
c4ai-command-r-plus-08-2024:
|
|
19
3
|
model_family: c4ai-command-r
|
|
20
4
|
model_variant: plus-08-2024
|
|
@@ -22,15 +6,13 @@ models:
|
|
|
22
6
|
gpus_per_node: 4
|
|
23
7
|
num_nodes: 2
|
|
24
8
|
vocab_size: 256000
|
|
25
|
-
qos: m2
|
|
26
9
|
time: 08:00:00
|
|
27
|
-
|
|
10
|
+
resource_type: l40s
|
|
28
11
|
vllm_args:
|
|
29
12
|
--pipeline-parallel-size: 2
|
|
30
13
|
--tensor-parallel-size: 4
|
|
31
14
|
--max-model-len: 65536
|
|
32
15
|
--max-num-seqs: 256
|
|
33
|
-
--compilation-config: 3
|
|
34
16
|
c4ai-command-r-08-2024:
|
|
35
17
|
model_family: c4ai-command-r
|
|
36
18
|
model_variant: 08-2024
|
|
@@ -38,14 +20,12 @@ models:
|
|
|
38
20
|
gpus_per_node: 2
|
|
39
21
|
num_nodes: 1
|
|
40
22
|
vocab_size: 256000
|
|
41
|
-
qos: m2
|
|
42
23
|
time: 08:00:00
|
|
43
|
-
|
|
24
|
+
resource_type: l40s
|
|
44
25
|
vllm_args:
|
|
45
26
|
--tensor-parallel-size: 2
|
|
46
27
|
--max-model-len: 32768
|
|
47
28
|
--max-num-seqs: 256
|
|
48
|
-
--compilation-config: 3
|
|
49
29
|
CodeLlama-7b-hf:
|
|
50
30
|
model_family: CodeLlama
|
|
51
31
|
model_variant: 7b-hf
|
|
@@ -53,13 +33,11 @@ models:
|
|
|
53
33
|
gpus_per_node: 1
|
|
54
34
|
num_nodes: 1
|
|
55
35
|
vocab_size: 32000
|
|
56
|
-
qos: m2
|
|
57
36
|
time: 08:00:00
|
|
58
|
-
|
|
37
|
+
resource_type: l40s
|
|
59
38
|
vllm_args:
|
|
60
39
|
--max-model-len: 16384
|
|
61
40
|
--max-num-seqs: 256
|
|
62
|
-
--compilation-config: 3
|
|
63
41
|
CodeLlama-7b-Instruct-hf:
|
|
64
42
|
model_family: CodeLlama
|
|
65
43
|
model_variant: 7b-Instruct-hf
|
|
@@ -67,13 +45,11 @@ models:
|
|
|
67
45
|
gpus_per_node: 1
|
|
68
46
|
num_nodes: 1
|
|
69
47
|
vocab_size: 32000
|
|
70
|
-
qos: m2
|
|
71
48
|
time: 08:00:00
|
|
72
|
-
|
|
49
|
+
resource_type: l40s
|
|
73
50
|
vllm_args:
|
|
74
51
|
--max-model-len: 16384
|
|
75
52
|
--max-num-seqs: 256
|
|
76
|
-
--compilation-config: 3
|
|
77
53
|
CodeLlama-13b-hf:
|
|
78
54
|
model_family: CodeLlama
|
|
79
55
|
model_variant: 13b-hf
|
|
@@ -81,13 +57,11 @@ models:
|
|
|
81
57
|
gpus_per_node: 1
|
|
82
58
|
num_nodes: 1
|
|
83
59
|
vocab_size: 32000
|
|
84
|
-
qos: m2
|
|
85
60
|
time: 08:00:00
|
|
86
|
-
|
|
61
|
+
resource_type: l40s
|
|
87
62
|
vllm_args:
|
|
88
63
|
--max-model-len: 16384
|
|
89
64
|
--max-num-seqs: 256
|
|
90
|
-
--compilation-config: 3
|
|
91
65
|
CodeLlama-13b-Instruct-hf:
|
|
92
66
|
model_family: CodeLlama
|
|
93
67
|
model_variant: 13b-Instruct-hf
|
|
@@ -95,13 +69,11 @@ models:
|
|
|
95
69
|
gpus_per_node: 1
|
|
96
70
|
num_nodes: 1
|
|
97
71
|
vocab_size: 32000
|
|
98
|
-
qos: m2
|
|
99
72
|
time: 08:00:00
|
|
100
|
-
|
|
73
|
+
resource_type: l40s
|
|
101
74
|
vllm_args:
|
|
102
75
|
--max-model-len: 16384
|
|
103
76
|
--max-num-seqs: 256
|
|
104
|
-
--compilation-config: 3
|
|
105
77
|
CodeLlama-34b-hf:
|
|
106
78
|
model_family: CodeLlama
|
|
107
79
|
model_variant: 34b-hf
|
|
@@ -109,14 +81,12 @@ models:
|
|
|
109
81
|
gpus_per_node: 2
|
|
110
82
|
num_nodes: 1
|
|
111
83
|
vocab_size: 32000
|
|
112
|
-
qos: m2
|
|
113
84
|
time: 08:00:00
|
|
114
|
-
|
|
85
|
+
resource_type: l40s
|
|
115
86
|
vllm_args:
|
|
116
87
|
--tensor-parallel-size: 2
|
|
117
88
|
--max-model-len: 16384
|
|
118
89
|
--max-num-seqs: 256
|
|
119
|
-
--compilation-config: 3
|
|
120
90
|
CodeLlama-34b-Instruct-hf:
|
|
121
91
|
model_family: CodeLlama
|
|
122
92
|
model_variant: 34b-Instruct-hf
|
|
@@ -124,14 +94,12 @@ models:
|
|
|
124
94
|
gpus_per_node: 2
|
|
125
95
|
num_nodes: 1
|
|
126
96
|
vocab_size: 32000
|
|
127
|
-
qos: m2
|
|
128
97
|
time: 08:00:00
|
|
129
|
-
|
|
98
|
+
resource_type: l40s
|
|
130
99
|
vllm_args:
|
|
131
100
|
--tensor-parallel-size: 2
|
|
132
101
|
--max-model-len: 16384
|
|
133
102
|
--max-num-seqs: 256
|
|
134
|
-
--compilation-config: 3
|
|
135
103
|
CodeLlama-70b-hf:
|
|
136
104
|
model_family: CodeLlama
|
|
137
105
|
model_variant: 70b-hf
|
|
@@ -139,14 +107,12 @@ models:
|
|
|
139
107
|
gpus_per_node: 4
|
|
140
108
|
num_nodes: 1
|
|
141
109
|
vocab_size: 32016
|
|
142
|
-
qos: m2
|
|
143
110
|
time: 08:00:00
|
|
144
|
-
|
|
111
|
+
resource_type: l40s
|
|
145
112
|
vllm_args:
|
|
146
113
|
--tensor-parallel-size: 4
|
|
147
114
|
--max-model-len: 4096
|
|
148
115
|
--max-num-seqs: 256
|
|
149
|
-
--compilation-config: 3
|
|
150
116
|
CodeLlama-70b-Instruct-hf:
|
|
151
117
|
model_family: CodeLlama
|
|
152
118
|
model_variant: 70b-Instruct-hf
|
|
@@ -154,14 +120,12 @@ models:
|
|
|
154
120
|
gpus_per_node: 4
|
|
155
121
|
num_nodes: 1
|
|
156
122
|
vocab_size: 32016
|
|
157
|
-
qos: m2
|
|
158
123
|
time: 08:00:00
|
|
159
|
-
|
|
124
|
+
resource_type: l40s
|
|
160
125
|
vllm_args:
|
|
161
126
|
--tensor-parallel-size: 4
|
|
162
127
|
--max-model-len: 4096
|
|
163
128
|
--max-num-seqs: 256
|
|
164
|
-
--compilation-config: 3
|
|
165
129
|
gemma-2-9b:
|
|
166
130
|
model_family: gemma-2
|
|
167
131
|
model_variant: 9b
|
|
@@ -169,13 +133,11 @@ models:
|
|
|
169
133
|
gpus_per_node: 1
|
|
170
134
|
num_nodes: 1
|
|
171
135
|
vocab_size: 256000
|
|
172
|
-
qos: m2
|
|
173
136
|
time: 08:00:00
|
|
174
|
-
|
|
137
|
+
resource_type: l40s
|
|
175
138
|
vllm_args:
|
|
176
139
|
--max-model-len: 4096
|
|
177
140
|
--max-num-seqs: 256
|
|
178
|
-
--compilation-config: 3
|
|
179
141
|
gemma-2-9b-it:
|
|
180
142
|
model_family: gemma-2
|
|
181
143
|
model_variant: 9b-it
|
|
@@ -183,13 +145,11 @@ models:
|
|
|
183
145
|
gpus_per_node: 1
|
|
184
146
|
num_nodes: 1
|
|
185
147
|
vocab_size: 256000
|
|
186
|
-
qos: m2
|
|
187
148
|
time: 08:00:00
|
|
188
|
-
|
|
149
|
+
resource_type: l40s
|
|
189
150
|
vllm_args:
|
|
190
151
|
--max-model-len: 4096
|
|
191
152
|
--max-num-seqs: 256
|
|
192
|
-
--compilation-config: 3
|
|
193
153
|
gemma-2-27b:
|
|
194
154
|
model_family: gemma-2
|
|
195
155
|
model_variant: 27b
|
|
@@ -197,14 +157,12 @@ models:
|
|
|
197
157
|
gpus_per_node: 2
|
|
198
158
|
num_nodes: 1
|
|
199
159
|
vocab_size: 256000
|
|
200
|
-
qos: m2
|
|
201
160
|
time: 08:00:00
|
|
202
|
-
|
|
161
|
+
resource_type: l40s
|
|
203
162
|
vllm_args:
|
|
204
163
|
--tensor-parallel-size: 2
|
|
205
164
|
--max-model-len: 4096
|
|
206
165
|
--max-num-seqs: 256
|
|
207
|
-
--compilation-config: 3
|
|
208
166
|
gemma-2-27b-it:
|
|
209
167
|
model_family: gemma-2
|
|
210
168
|
model_variant: 27b-it
|
|
@@ -212,14 +170,12 @@ models:
|
|
|
212
170
|
gpus_per_node: 2
|
|
213
171
|
num_nodes: 1
|
|
214
172
|
vocab_size: 256000
|
|
215
|
-
qos: m2
|
|
216
173
|
time: 08:00:00
|
|
217
|
-
|
|
174
|
+
resource_type: l40s
|
|
218
175
|
vllm_args:
|
|
219
176
|
--tensor-parallel-size: 2
|
|
220
177
|
--max-model-len: 4096
|
|
221
178
|
--max-num-seqs: 256
|
|
222
|
-
--compilation-config: 3
|
|
223
179
|
Llama-2-7b-hf:
|
|
224
180
|
model_family: Llama-2
|
|
225
181
|
model_variant: 7b-hf
|
|
@@ -227,13 +183,11 @@ models:
|
|
|
227
183
|
gpus_per_node: 1
|
|
228
184
|
num_nodes: 1
|
|
229
185
|
vocab_size: 32000
|
|
230
|
-
qos: m2
|
|
231
186
|
time: 08:00:00
|
|
232
|
-
|
|
187
|
+
resource_type: l40s
|
|
233
188
|
vllm_args:
|
|
234
189
|
--max-model-len: 4096
|
|
235
190
|
--max-num-seqs: 256
|
|
236
|
-
--compilation-config: 3
|
|
237
191
|
Llama-2-7b-chat-hf:
|
|
238
192
|
model_family: Llama-2
|
|
239
193
|
model_variant: 7b-chat-hf
|
|
@@ -241,13 +195,11 @@ models:
|
|
|
241
195
|
gpus_per_node: 1
|
|
242
196
|
num_nodes: 1
|
|
243
197
|
vocab_size: 32000
|
|
244
|
-
qos: m2
|
|
245
198
|
time: 08:00:00
|
|
246
|
-
|
|
199
|
+
resource_type: l40s
|
|
247
200
|
vllm_args:
|
|
248
201
|
--max-model-len: 4096
|
|
249
202
|
--max-num-seqs: 256
|
|
250
|
-
--compilation-config: 3
|
|
251
203
|
Llama-2-13b-hf:
|
|
252
204
|
model_family: Llama-2
|
|
253
205
|
model_variant: 13b-hf
|
|
@@ -255,13 +207,11 @@ models:
|
|
|
255
207
|
gpus_per_node: 1
|
|
256
208
|
num_nodes: 1
|
|
257
209
|
vocab_size: 32000
|
|
258
|
-
qos: m2
|
|
259
210
|
time: 08:00:00
|
|
260
|
-
|
|
211
|
+
resource_type: l40s
|
|
261
212
|
vllm_args:
|
|
262
213
|
--max-model-len: 4096
|
|
263
214
|
--max-num-seqs: 256
|
|
264
|
-
--compilation-config: 3
|
|
265
215
|
Llama-2-13b-chat-hf:
|
|
266
216
|
model_family: Llama-2
|
|
267
217
|
model_variant: 13b-chat-hf
|
|
@@ -269,13 +219,11 @@ models:
|
|
|
269
219
|
gpus_per_node: 1
|
|
270
220
|
num_nodes: 1
|
|
271
221
|
vocab_size: 32000
|
|
272
|
-
qos: m2
|
|
273
222
|
time: 08:00:00
|
|
274
|
-
|
|
223
|
+
resource_type: l40s
|
|
275
224
|
vllm_args:
|
|
276
225
|
--max-model-len: 4096
|
|
277
226
|
--max-num-seqs: 256
|
|
278
|
-
--compilation-config: 3
|
|
279
227
|
Llama-2-70b-hf:
|
|
280
228
|
model_family: Llama-2
|
|
281
229
|
model_variant: 70b-hf
|
|
@@ -283,14 +231,12 @@ models:
|
|
|
283
231
|
gpus_per_node: 4
|
|
284
232
|
num_nodes: 1
|
|
285
233
|
vocab_size: 32000
|
|
286
|
-
qos: m2
|
|
287
234
|
time: 08:00:00
|
|
288
|
-
|
|
235
|
+
resource_type: l40s
|
|
289
236
|
vllm_args:
|
|
290
237
|
--tensor-parallel-size: 4
|
|
291
238
|
--max-model-len: 4096
|
|
292
239
|
--max-num-seqs: 256
|
|
293
|
-
--compilation-config: 3
|
|
294
240
|
Llama-2-70b-chat-hf:
|
|
295
241
|
model_family: Llama-2
|
|
296
242
|
model_variant: 70b-chat-hf
|
|
@@ -298,14 +244,12 @@ models:
|
|
|
298
244
|
gpus_per_node: 4
|
|
299
245
|
num_nodes: 1
|
|
300
246
|
vocab_size: 32000
|
|
301
|
-
qos: m2
|
|
302
247
|
time: 08:00:00
|
|
303
|
-
|
|
248
|
+
resource_type: l40s
|
|
304
249
|
vllm_args:
|
|
305
250
|
--tensor-parallel-size: 4
|
|
306
251
|
--max-model-len: 4096
|
|
307
252
|
--max-num-seqs: 256
|
|
308
|
-
--compilation-config: 3
|
|
309
253
|
llava-1.5-7b-hf:
|
|
310
254
|
model_family: llava-1.5
|
|
311
255
|
model_variant: 7b-hf
|
|
@@ -313,13 +257,11 @@ models:
|
|
|
313
257
|
gpus_per_node: 1
|
|
314
258
|
num_nodes: 1
|
|
315
259
|
vocab_size: 32000
|
|
316
|
-
qos: m2
|
|
317
260
|
time: 08:00:00
|
|
318
|
-
|
|
261
|
+
resource_type: l40s
|
|
319
262
|
vllm_args:
|
|
320
263
|
--max-model-len: 4096
|
|
321
264
|
--max-num-seqs: 256
|
|
322
|
-
--compilation-config: 3
|
|
323
265
|
llava-1.5-13b-hf:
|
|
324
266
|
model_family: llava-1.5
|
|
325
267
|
model_variant: 13b-hf
|
|
@@ -327,13 +269,11 @@ models:
|
|
|
327
269
|
gpus_per_node: 1
|
|
328
270
|
num_nodes: 1
|
|
329
271
|
vocab_size: 32000
|
|
330
|
-
qos: m2
|
|
331
272
|
time: 08:00:00
|
|
332
|
-
|
|
273
|
+
resource_type: l40s
|
|
333
274
|
vllm_args:
|
|
334
275
|
--max-model-len: 4096
|
|
335
276
|
--max-num-seqs: 256
|
|
336
|
-
--compilation-config: 3
|
|
337
277
|
llava-v1.6-mistral-7b-hf:
|
|
338
278
|
model_family: llava-v1.6
|
|
339
279
|
model_variant: mistral-7b-hf
|
|
@@ -341,13 +281,11 @@ models:
|
|
|
341
281
|
gpus_per_node: 1
|
|
342
282
|
num_nodes: 1
|
|
343
283
|
vocab_size: 32064
|
|
344
|
-
qos: m2
|
|
345
284
|
time: 08:00:00
|
|
346
|
-
|
|
285
|
+
resource_type: l40s
|
|
347
286
|
vllm_args:
|
|
348
287
|
--max-model-len: 32768
|
|
349
288
|
--max-num-seqs: 256
|
|
350
|
-
--compilation-config: 3
|
|
351
289
|
llava-v1.6-34b-hf:
|
|
352
290
|
model_family: llava-v1.6
|
|
353
291
|
model_variant: 34b-hf
|
|
@@ -355,14 +293,12 @@ models:
|
|
|
355
293
|
gpus_per_node: 2
|
|
356
294
|
num_nodes: 1
|
|
357
295
|
vocab_size: 64064
|
|
358
|
-
qos: m2
|
|
359
296
|
time: 08:00:00
|
|
360
|
-
|
|
297
|
+
resource_type: l40s
|
|
361
298
|
vllm_args:
|
|
362
299
|
--tensor-parallel-size: 2
|
|
363
300
|
--max-model-len: 4096
|
|
364
301
|
--max-num-seqs: 256
|
|
365
|
-
--compilation-config: 3
|
|
366
302
|
Meta-Llama-3-8B:
|
|
367
303
|
model_family: Meta-Llama-3
|
|
368
304
|
model_variant: 8B
|
|
@@ -370,13 +306,11 @@ models:
|
|
|
370
306
|
gpus_per_node: 1
|
|
371
307
|
num_nodes: 1
|
|
372
308
|
vocab_size: 128256
|
|
373
|
-
qos: m2
|
|
374
309
|
time: 08:00:00
|
|
375
|
-
|
|
310
|
+
resource_type: l40s
|
|
376
311
|
vllm_args:
|
|
377
312
|
--max-model-len: 8192
|
|
378
313
|
--max-num-seqs: 256
|
|
379
|
-
--compilation-config: 3
|
|
380
314
|
Meta-Llama-3-8B-Instruct:
|
|
381
315
|
model_family: Meta-Llama-3
|
|
382
316
|
model_variant: 8B-Instruct
|
|
@@ -384,13 +318,11 @@ models:
|
|
|
384
318
|
gpus_per_node: 1
|
|
385
319
|
num_nodes: 1
|
|
386
320
|
vocab_size: 128256
|
|
387
|
-
qos: m2
|
|
388
321
|
time: 08:00:00
|
|
389
|
-
|
|
322
|
+
resource_type: l40s
|
|
390
323
|
vllm_args:
|
|
391
324
|
--max-model-len: 8192
|
|
392
325
|
--max-num-seqs: 256
|
|
393
|
-
--compilation-config: 3
|
|
394
326
|
Meta-Llama-3-70B:
|
|
395
327
|
model_family: Meta-Llama-3
|
|
396
328
|
model_variant: 70B
|
|
@@ -398,14 +330,12 @@ models:
|
|
|
398
330
|
gpus_per_node: 4
|
|
399
331
|
num_nodes: 1
|
|
400
332
|
vocab_size: 128256
|
|
401
|
-
qos: m2
|
|
402
333
|
time: 08:00:00
|
|
403
|
-
|
|
334
|
+
resource_type: l40s
|
|
404
335
|
vllm_args:
|
|
405
336
|
--tensor-parallel-size: 4
|
|
406
337
|
--max-model-len: 8192
|
|
407
338
|
--max-num-seqs: 256
|
|
408
|
-
--compilation-config: 3
|
|
409
339
|
Meta-Llama-3-70B-Instruct:
|
|
410
340
|
model_family: Meta-Llama-3
|
|
411
341
|
model_variant: 70B-Instruct
|
|
@@ -413,14 +343,12 @@ models:
|
|
|
413
343
|
gpus_per_node: 4
|
|
414
344
|
num_nodes: 1
|
|
415
345
|
vocab_size: 128256
|
|
416
|
-
qos: m2
|
|
417
346
|
time: 08:00:00
|
|
418
|
-
|
|
347
|
+
resource_type: l40s
|
|
419
348
|
vllm_args:
|
|
420
349
|
--tensor-parallel-size: 4
|
|
421
350
|
--max-model-len: 8192
|
|
422
351
|
--max-num-seqs: 256
|
|
423
|
-
--compilation-config: 3
|
|
424
352
|
Meta-Llama-3.1-8B:
|
|
425
353
|
model_family: Meta-Llama-3.1
|
|
426
354
|
model_variant: 8B
|
|
@@ -428,13 +356,11 @@ models:
|
|
|
428
356
|
gpus_per_node: 1
|
|
429
357
|
num_nodes: 1
|
|
430
358
|
vocab_size: 128256
|
|
431
|
-
qos: m2
|
|
432
359
|
time: 08:00:00
|
|
433
|
-
|
|
360
|
+
resource_type: l40s
|
|
434
361
|
vllm_args:
|
|
435
362
|
--max-model-len: 131072
|
|
436
363
|
--max-num-seqs: 256
|
|
437
|
-
--compilation-config: 3
|
|
438
364
|
Meta-Llama-3.1-8B-Instruct:
|
|
439
365
|
model_family: Meta-Llama-3.1
|
|
440
366
|
model_variant: 8B-Instruct
|
|
@@ -442,13 +368,11 @@ models:
|
|
|
442
368
|
gpus_per_node: 1
|
|
443
369
|
num_nodes: 1
|
|
444
370
|
vocab_size: 128256
|
|
445
|
-
qos: m2
|
|
446
371
|
time: 08:00:00
|
|
447
|
-
|
|
372
|
+
resource_type: l40s
|
|
448
373
|
vllm_args:
|
|
449
374
|
--max-model-len: 131072
|
|
450
375
|
--max-num-seqs: 256
|
|
451
|
-
--compilation-config: 3
|
|
452
376
|
Meta-Llama-3.1-70B:
|
|
453
377
|
model_family: Meta-Llama-3.1
|
|
454
378
|
model_variant: 70B
|
|
@@ -456,14 +380,12 @@ models:
|
|
|
456
380
|
gpus_per_node: 4
|
|
457
381
|
num_nodes: 1
|
|
458
382
|
vocab_size: 128256
|
|
459
|
-
qos: m2
|
|
460
383
|
time: 08:00:00
|
|
461
|
-
|
|
384
|
+
resource_type: l40s
|
|
462
385
|
vllm_args:
|
|
463
386
|
--tensor-parallel-size: 4
|
|
464
387
|
--max-model-len: 65536
|
|
465
388
|
--max-num-seqs: 256
|
|
466
|
-
--compilation-config: 3
|
|
467
389
|
Meta-Llama-3.1-70B-Instruct:
|
|
468
390
|
model_family: Meta-Llama-3.1
|
|
469
391
|
model_variant: 70B-Instruct
|
|
@@ -471,14 +393,12 @@ models:
|
|
|
471
393
|
gpus_per_node: 4
|
|
472
394
|
num_nodes: 1
|
|
473
395
|
vocab_size: 128256
|
|
474
|
-
qos: m2
|
|
475
396
|
time: 08:00:00
|
|
476
|
-
|
|
397
|
+
resource_type: l40s
|
|
477
398
|
vllm_args:
|
|
478
399
|
--tensor-parallel-size: 4
|
|
479
400
|
--max-model-len: 65536
|
|
480
401
|
--max-num-seqs: 256
|
|
481
|
-
--compilation-config: 3
|
|
482
402
|
Meta-Llama-3.1-405B-Instruct:
|
|
483
403
|
model_family: Meta-Llama-3.1
|
|
484
404
|
model_variant: 405B-Instruct
|
|
@@ -488,13 +408,12 @@ models:
|
|
|
488
408
|
vocab_size: 128256
|
|
489
409
|
qos: m4
|
|
490
410
|
time: 02:00:00
|
|
491
|
-
|
|
411
|
+
resource_type: l40s
|
|
492
412
|
vllm_args:
|
|
493
413
|
--pipeline-parallel-size: 8
|
|
494
414
|
--tensor-parallel-size: 4
|
|
495
415
|
--max-model-len: 16384
|
|
496
416
|
--max-num-seqs: 256
|
|
497
|
-
--compilation-config: 3
|
|
498
417
|
Mistral-7B-Instruct-v0.1:
|
|
499
418
|
model_family: Mistral
|
|
500
419
|
model_variant: 7B-Instruct-v0.1
|
|
@@ -502,13 +421,11 @@ models:
|
|
|
502
421
|
gpus_per_node: 1
|
|
503
422
|
num_nodes: 1
|
|
504
423
|
vocab_size: 32000
|
|
505
|
-
qos: m2
|
|
506
424
|
time: 08:00:00
|
|
507
|
-
|
|
425
|
+
resource_type: l40s
|
|
508
426
|
vllm_args:
|
|
509
427
|
--max-model-len: 32768
|
|
510
428
|
--max-num-seqs: 256
|
|
511
|
-
--compilation-config: 3
|
|
512
429
|
Mistral-7B-Instruct-v0.2:
|
|
513
430
|
model_family: Mistral
|
|
514
431
|
model_variant: 7B-Instruct-v0.2
|
|
@@ -516,13 +433,11 @@ models:
|
|
|
516
433
|
gpus_per_node: 1
|
|
517
434
|
num_nodes: 1
|
|
518
435
|
vocab_size: 32000
|
|
519
|
-
qos: m2
|
|
520
436
|
time: 08:00:00
|
|
521
|
-
|
|
437
|
+
resource_type: l40s
|
|
522
438
|
vllm_args:
|
|
523
439
|
--max-model-len: 32768
|
|
524
440
|
--max-num-seqs: 256
|
|
525
|
-
--compilation-config: 3
|
|
526
441
|
Mistral-7B-v0.3:
|
|
527
442
|
model_family: Mistral
|
|
528
443
|
model_variant: 7B-v0.3
|
|
@@ -530,13 +445,11 @@ models:
|
|
|
530
445
|
gpus_per_node: 1
|
|
531
446
|
num_nodes: 1
|
|
532
447
|
vocab_size: 32768
|
|
533
|
-
qos: m2
|
|
534
448
|
time: 08:00:00
|
|
535
|
-
|
|
449
|
+
resource_type: l40s
|
|
536
450
|
vllm_args:
|
|
537
451
|
--max-model-len: 32768
|
|
538
452
|
--max-num-seqs: 256
|
|
539
|
-
--compilation-config: 3
|
|
540
453
|
Mistral-7B-Instruct-v0.3:
|
|
541
454
|
model_family: Mistral
|
|
542
455
|
model_variant: 7B-Instruct-v0.3
|
|
@@ -544,13 +457,11 @@ models:
|
|
|
544
457
|
gpus_per_node: 1
|
|
545
458
|
num_nodes: 1
|
|
546
459
|
vocab_size: 32768
|
|
547
|
-
qos: m2
|
|
548
460
|
time: 08:00:00
|
|
549
|
-
|
|
461
|
+
resource_type: l40s
|
|
550
462
|
vllm_args:
|
|
551
463
|
--max-model-len: 32768
|
|
552
464
|
--max-num-seqs: 256
|
|
553
|
-
--compilation-config: 3
|
|
554
465
|
Mistral-Large-Instruct-2407:
|
|
555
466
|
model_family: Mistral
|
|
556
467
|
model_variant: Large-Instruct-2407
|
|
@@ -558,15 +469,13 @@ models:
|
|
|
558
469
|
gpus_per_node: 4
|
|
559
470
|
num_nodes: 2
|
|
560
471
|
vocab_size: 32768
|
|
561
|
-
qos: m2
|
|
562
472
|
time: 08:00:00
|
|
563
|
-
|
|
473
|
+
resource_type: l40s
|
|
564
474
|
vllm_args:
|
|
565
475
|
--pipeline-parallel-size: 2
|
|
566
476
|
--tensor-parallel-size: 4
|
|
567
477
|
--max-model-len: 32768
|
|
568
478
|
--max-num-seqs: 256
|
|
569
|
-
--compilation-config: 3
|
|
570
479
|
Mistral-Large-Instruct-2411:
|
|
571
480
|
model_family: Mistral
|
|
572
481
|
model_variant: Large-Instruct-2411
|
|
@@ -574,15 +483,13 @@ models:
|
|
|
574
483
|
gpus_per_node: 4
|
|
575
484
|
num_nodes: 2
|
|
576
485
|
vocab_size: 32768
|
|
577
|
-
qos: m2
|
|
578
486
|
time: 08:00:00
|
|
579
|
-
|
|
487
|
+
resource_type: l40s
|
|
580
488
|
vllm_args:
|
|
581
489
|
--pipeline-parallel-size: 2
|
|
582
490
|
--tensor-parallel-size: 4
|
|
583
491
|
--max-model-len: 32768
|
|
584
492
|
--max-num-seqs: 256
|
|
585
|
-
--compilation-config: 3
|
|
586
493
|
Mixtral-8x7B-Instruct-v0.1:
|
|
587
494
|
model_family: Mixtral
|
|
588
495
|
model_variant: 8x7B-Instruct-v0.1
|
|
@@ -590,14 +497,12 @@ models:
|
|
|
590
497
|
gpus_per_node: 4
|
|
591
498
|
num_nodes: 1
|
|
592
499
|
vocab_size: 32000
|
|
593
|
-
qos: m2
|
|
594
500
|
time: 08:00:00
|
|
595
|
-
|
|
501
|
+
resource_type: l40s
|
|
596
502
|
vllm_args:
|
|
597
503
|
--tensor-parallel-size: 4
|
|
598
504
|
--max-model-len: 32768
|
|
599
505
|
--max-num-seqs: 256
|
|
600
|
-
--compilation-config: 3
|
|
601
506
|
Mixtral-8x22B-v0.1:
|
|
602
507
|
model_family: Mixtral
|
|
603
508
|
model_variant: 8x22B-v0.1
|
|
@@ -605,15 +510,13 @@ models:
|
|
|
605
510
|
gpus_per_node: 4
|
|
606
511
|
num_nodes: 2
|
|
607
512
|
vocab_size: 32768
|
|
608
|
-
qos: m2
|
|
609
513
|
time: 08:00:00
|
|
610
|
-
|
|
514
|
+
resource_type: l40s
|
|
611
515
|
vllm_args:
|
|
612
516
|
--pipeline-parallel-size: 2
|
|
613
517
|
--tensor-parallel-size: 4
|
|
614
518
|
--max-model-len: 65536
|
|
615
519
|
--max-num-seqs: 256
|
|
616
|
-
--compilation-config: 3
|
|
617
520
|
Mixtral-8x22B-Instruct-v0.1:
|
|
618
521
|
model_family: Mixtral
|
|
619
522
|
model_variant: 8x22B-Instruct-v0.1
|
|
@@ -621,15 +524,13 @@ models:
|
|
|
621
524
|
gpus_per_node: 4
|
|
622
525
|
num_nodes: 2
|
|
623
526
|
vocab_size: 32768
|
|
624
|
-
qos: m2
|
|
625
527
|
time: 08:00:00
|
|
626
|
-
|
|
528
|
+
resource_type: l40s
|
|
627
529
|
vllm_args:
|
|
628
530
|
--pipeline-parallel-size: 2
|
|
629
531
|
--tensor-parallel-size: 4
|
|
630
532
|
--max-model-len: 65536
|
|
631
533
|
--max-num-seqs: 256
|
|
632
|
-
--compilation-config: 3
|
|
633
534
|
Phi-3-medium-128k-instruct:
|
|
634
535
|
model_family: Phi-3
|
|
635
536
|
model_variant: medium-128k-instruct
|
|
@@ -637,14 +538,12 @@ models:
|
|
|
637
538
|
gpus_per_node: 2
|
|
638
539
|
num_nodes: 1
|
|
639
540
|
vocab_size: 32064
|
|
640
|
-
qos: m2
|
|
641
541
|
time: 08:00:00
|
|
642
|
-
|
|
542
|
+
resource_type: l40s
|
|
643
543
|
vllm_args:
|
|
644
544
|
--tensor-parallel-size: 2
|
|
645
545
|
--max-model-len: 131072
|
|
646
546
|
--max-num-seqs: 256
|
|
647
|
-
--compilation-config: 3
|
|
648
547
|
Phi-3-vision-128k-instruct:
|
|
649
548
|
model_family: Phi-3-vision
|
|
650
549
|
model_variant: 128k-instruct
|
|
@@ -652,14 +551,12 @@ models:
|
|
|
652
551
|
gpus_per_node: 2
|
|
653
552
|
num_nodes: 1
|
|
654
553
|
vocab_size: 32064
|
|
655
|
-
qos: m2
|
|
656
554
|
time: 08:00:00
|
|
657
|
-
|
|
555
|
+
resource_type: l40s
|
|
658
556
|
vllm_args:
|
|
659
557
|
--tensor-parallel-size: 2
|
|
660
558
|
--max-model-len: 65536
|
|
661
559
|
--max-num-seqs: 256
|
|
662
|
-
--compilation-config: 3
|
|
663
560
|
Llama3-OpenBioLLM-70B:
|
|
664
561
|
model_family: Llama3-OpenBioLLM
|
|
665
562
|
model_variant: 70B
|
|
@@ -667,14 +564,12 @@ models:
|
|
|
667
564
|
gpus_per_node: 4
|
|
668
565
|
num_nodes: 1
|
|
669
566
|
vocab_size: 128256
|
|
670
|
-
qos: m2
|
|
671
567
|
time: 08:00:00
|
|
672
|
-
|
|
568
|
+
resource_type: l40s
|
|
673
569
|
vllm_args:
|
|
674
570
|
--tensor-parallel-size: 4
|
|
675
571
|
--max-model-len: 8192
|
|
676
572
|
--max-num-seqs: 256
|
|
677
|
-
--compilation-config: 3
|
|
678
573
|
Llama-3.1-Nemotron-70B-Instruct-HF:
|
|
679
574
|
model_family: Llama-3.1-Nemotron
|
|
680
575
|
model_variant: 70B-Instruct-HF
|
|
@@ -682,14 +577,12 @@ models:
|
|
|
682
577
|
gpus_per_node: 4
|
|
683
578
|
num_nodes: 1
|
|
684
579
|
vocab_size: 128256
|
|
685
|
-
qos: m2
|
|
686
580
|
time: 08:00:00
|
|
687
|
-
|
|
581
|
+
resource_type: l40s
|
|
688
582
|
vllm_args:
|
|
689
583
|
--tensor-parallel-size: 4
|
|
690
584
|
--max-model-len: 65536
|
|
691
585
|
--max-num-seqs: 256
|
|
692
|
-
--compilation-config: 3
|
|
693
586
|
Llama-3.2-1B:
|
|
694
587
|
model_family: Llama-3.2
|
|
695
588
|
model_variant: 1B
|
|
@@ -697,13 +590,11 @@ models:
|
|
|
697
590
|
gpus_per_node: 1
|
|
698
591
|
num_nodes: 1
|
|
699
592
|
vocab_size: 128256
|
|
700
|
-
qos: m2
|
|
701
593
|
time: 08:00:00
|
|
702
|
-
|
|
594
|
+
resource_type: l40s
|
|
703
595
|
vllm_args:
|
|
704
596
|
--max-model-len: 131072
|
|
705
597
|
--max-num-seqs: 256
|
|
706
|
-
--compilation-config: 3
|
|
707
598
|
Llama-3.2-1B-Instruct:
|
|
708
599
|
model_family: Llama-3.2
|
|
709
600
|
model_variant: 1B-Instruct
|
|
@@ -711,13 +602,11 @@ models:
|
|
|
711
602
|
gpus_per_node: 1
|
|
712
603
|
num_nodes: 1
|
|
713
604
|
vocab_size: 128256
|
|
714
|
-
qos: m2
|
|
715
605
|
time: 08:00:00
|
|
716
|
-
|
|
606
|
+
resource_type: l40s
|
|
717
607
|
vllm_args:
|
|
718
608
|
--max-model-len: 131072
|
|
719
609
|
--max-num-seqs: 256
|
|
720
|
-
--compilation-config: 3
|
|
721
610
|
Llama-3.2-3B:
|
|
722
611
|
model_family: Llama-3.2
|
|
723
612
|
model_variant: 3B
|
|
@@ -725,13 +614,11 @@ models:
|
|
|
725
614
|
gpus_per_node: 1
|
|
726
615
|
num_nodes: 1
|
|
727
616
|
vocab_size: 128256
|
|
728
|
-
qos: m2
|
|
729
617
|
time: 08:00:00
|
|
730
|
-
|
|
618
|
+
resource_type: l40s
|
|
731
619
|
vllm_args:
|
|
732
620
|
--max-model-len: 131072
|
|
733
621
|
--max-num-seqs: 256
|
|
734
|
-
--compilation-config: 3
|
|
735
622
|
Llama-3.2-3B-Instruct:
|
|
736
623
|
model_family: Llama-3.2
|
|
737
624
|
model_variant: 3B-Instruct
|
|
@@ -739,13 +626,11 @@ models:
|
|
|
739
626
|
gpus_per_node: 1
|
|
740
627
|
num_nodes: 1
|
|
741
628
|
vocab_size: 128256
|
|
742
|
-
qos: m2
|
|
743
629
|
time: 08:00:00
|
|
744
|
-
|
|
630
|
+
resource_type: l40s
|
|
745
631
|
vllm_args:
|
|
746
632
|
--max-model-len: 131072
|
|
747
633
|
--max-num-seqs: 256
|
|
748
|
-
--compilation-config: 3
|
|
749
634
|
Llama-3.2-11B-Vision:
|
|
750
635
|
model_family: Llama-3.2
|
|
751
636
|
model_variant: 11B-Vision
|
|
@@ -753,14 +638,12 @@ models:
|
|
|
753
638
|
gpus_per_node: 2
|
|
754
639
|
num_nodes: 1
|
|
755
640
|
vocab_size: 128256
|
|
756
|
-
qos: m2
|
|
757
641
|
time: 08:00:00
|
|
758
|
-
|
|
642
|
+
resource_type: l40s
|
|
759
643
|
vllm_args:
|
|
760
644
|
--tensor-parallel-size: 2
|
|
761
645
|
--max-model-len: 4096
|
|
762
646
|
--max-num-seqs: 64
|
|
763
|
-
--compilation-config: 3
|
|
764
647
|
--enforce-eager: true
|
|
765
648
|
Llama-3.2-11B-Vision-Instruct:
|
|
766
649
|
model_family: Llama-3.2
|
|
@@ -769,14 +652,12 @@ models:
|
|
|
769
652
|
gpus_per_node: 2
|
|
770
653
|
num_nodes: 1
|
|
771
654
|
vocab_size: 128256
|
|
772
|
-
qos: m2
|
|
773
655
|
time: 08:00:00
|
|
774
|
-
|
|
656
|
+
resource_type: l40s
|
|
775
657
|
vllm_args:
|
|
776
658
|
--tensor-parallel-size: 2
|
|
777
659
|
--max-model-len: 4096
|
|
778
660
|
--max-num-seqs: 64
|
|
779
|
-
--compilation-config: 3
|
|
780
661
|
--enforce-eager: true
|
|
781
662
|
Llama-3.2-90B-Vision:
|
|
782
663
|
model_family: Llama-3.2
|
|
@@ -785,14 +666,12 @@ models:
|
|
|
785
666
|
gpus_per_node: 4
|
|
786
667
|
num_nodes: 2
|
|
787
668
|
vocab_size: 128256
|
|
788
|
-
qos: m2
|
|
789
669
|
time: 08:00:00
|
|
790
|
-
|
|
670
|
+
resource_type: l40s
|
|
791
671
|
vllm_args:
|
|
792
672
|
--tensor-parallel-size: 8
|
|
793
673
|
--max-model-len: 4096
|
|
794
674
|
--max-num-seqs: 32
|
|
795
|
-
--compilation-config: 3
|
|
796
675
|
--enforce-eager: true
|
|
797
676
|
Llama-3.2-90B-Vision-Instruct:
|
|
798
677
|
model_family: Llama-3.2
|
|
@@ -801,14 +680,12 @@ models:
|
|
|
801
680
|
gpus_per_node: 4
|
|
802
681
|
num_nodes: 2
|
|
803
682
|
vocab_size: 128256
|
|
804
|
-
qos: m2
|
|
805
683
|
time: 08:00:00
|
|
806
|
-
|
|
684
|
+
resource_type: l40s
|
|
807
685
|
vllm_args:
|
|
808
686
|
--tensor-parallel-size: 8
|
|
809
687
|
--max-model-len: 4096
|
|
810
688
|
--max-num-seqs: 32
|
|
811
|
-
--compilation-config: 3
|
|
812
689
|
--enforce-eager: true
|
|
813
690
|
Qwen2.5-0.5B-Instruct:
|
|
814
691
|
model_family: Qwen2.5
|
|
@@ -817,13 +694,11 @@ models:
|
|
|
817
694
|
gpus_per_node: 1
|
|
818
695
|
num_nodes: 1
|
|
819
696
|
vocab_size: 152064
|
|
820
|
-
qos: m2
|
|
821
697
|
time: 08:00:00
|
|
822
|
-
|
|
698
|
+
resource_type: l40s
|
|
823
699
|
vllm_args:
|
|
824
700
|
--max-model-len: 32768
|
|
825
701
|
--max-num-seqs: 256
|
|
826
|
-
--compilation-config: 3
|
|
827
702
|
Qwen2.5-1.5B-Instruct:
|
|
828
703
|
model_family: Qwen2.5
|
|
829
704
|
model_variant: 1.5B-Instruct
|
|
@@ -831,13 +706,11 @@ models:
|
|
|
831
706
|
gpus_per_node: 1
|
|
832
707
|
num_nodes: 1
|
|
833
708
|
vocab_size: 152064
|
|
834
|
-
qos: m2
|
|
835
709
|
time: 08:00:00
|
|
836
|
-
|
|
710
|
+
resource_type: l40s
|
|
837
711
|
vllm_args:
|
|
838
712
|
--max-model-len: 32768
|
|
839
713
|
--max-num-seqs: 256
|
|
840
|
-
--compilation-config: 3
|
|
841
714
|
Qwen2.5-3B-Instruct:
|
|
842
715
|
model_family: Qwen2.5
|
|
843
716
|
model_variant: 3B-Instruct
|
|
@@ -845,13 +718,11 @@ models:
|
|
|
845
718
|
gpus_per_node: 1
|
|
846
719
|
num_nodes: 1
|
|
847
720
|
vocab_size: 152064
|
|
848
|
-
qos: m2
|
|
849
721
|
time: 08:00:00
|
|
850
|
-
|
|
722
|
+
resource_type: l40s
|
|
851
723
|
vllm_args:
|
|
852
724
|
--max-model-len: 32768
|
|
853
725
|
--max-num-seqs: 256
|
|
854
|
-
--compilation-config: 3
|
|
855
726
|
Qwen2.5-7B-Instruct:
|
|
856
727
|
model_family: Qwen2.5
|
|
857
728
|
model_variant: 7B-Instruct
|
|
@@ -859,13 +730,11 @@ models:
|
|
|
859
730
|
gpus_per_node: 1
|
|
860
731
|
num_nodes: 1
|
|
861
732
|
vocab_size: 152064
|
|
862
|
-
qos: m2
|
|
863
733
|
time: 08:00:00
|
|
864
|
-
|
|
734
|
+
resource_type: l40s
|
|
865
735
|
vllm_args:
|
|
866
736
|
--max-model-len: 32768
|
|
867
737
|
--max-num-seqs: 256
|
|
868
|
-
--compilation-config: 3
|
|
869
738
|
Qwen2.5-14B-Instruct:
|
|
870
739
|
model_family: Qwen2.5
|
|
871
740
|
model_variant: 14B-Instruct
|
|
@@ -873,13 +742,11 @@ models:
|
|
|
873
742
|
gpus_per_node: 1
|
|
874
743
|
num_nodes: 1
|
|
875
744
|
vocab_size: 152064
|
|
876
|
-
qos: m2
|
|
877
745
|
time: 08:00:00
|
|
878
|
-
|
|
746
|
+
resource_type: l40s
|
|
879
747
|
vllm_args:
|
|
880
748
|
--max-model-len: 32768
|
|
881
749
|
--max-num-seqs: 256
|
|
882
|
-
--compilation-config: 3
|
|
883
750
|
Qwen2.5-32B-Instruct:
|
|
884
751
|
model_family: Qwen2.5
|
|
885
752
|
model_variant: 32B-Instruct
|
|
@@ -887,14 +754,12 @@ models:
|
|
|
887
754
|
gpus_per_node: 2
|
|
888
755
|
num_nodes: 1
|
|
889
756
|
vocab_size: 152064
|
|
890
|
-
qos: m2
|
|
891
757
|
time: 08:00:00
|
|
892
|
-
|
|
758
|
+
resource_type: l40s
|
|
893
759
|
vllm_args:
|
|
894
760
|
--tensor-parallel-size: 2
|
|
895
761
|
--max-model-len: 32768
|
|
896
762
|
--max-num-seqs: 256
|
|
897
|
-
--compilation-config: 3
|
|
898
763
|
Qwen2.5-72B-Instruct:
|
|
899
764
|
model_family: Qwen2.5
|
|
900
765
|
model_variant: 72B-Instruct
|
|
@@ -902,14 +767,12 @@ models:
|
|
|
902
767
|
gpus_per_node: 4
|
|
903
768
|
num_nodes: 1
|
|
904
769
|
vocab_size: 152064
|
|
905
|
-
qos: m2
|
|
906
770
|
time: 08:00:00
|
|
907
|
-
|
|
771
|
+
resource_type: l40s
|
|
908
772
|
vllm_args:
|
|
909
773
|
--tensor-parallel-size: 4
|
|
910
774
|
--max-model-len: 16384
|
|
911
775
|
--max-num-seqs: 256
|
|
912
|
-
--compilation-config: 3
|
|
913
776
|
Qwen2.5-Math-1.5B-Instruct:
|
|
914
777
|
model_family: Qwen2.5
|
|
915
778
|
model_variant: Math-1.5B-Instruct
|
|
@@ -917,13 +780,11 @@ models:
|
|
|
917
780
|
gpus_per_node: 1
|
|
918
781
|
num_nodes: 1
|
|
919
782
|
vocab_size: 152064
|
|
920
|
-
qos: m2
|
|
921
783
|
time: 08:00:00
|
|
922
|
-
|
|
784
|
+
resource_type: l40s
|
|
923
785
|
vllm_args:
|
|
924
786
|
--max-model-len: 4096
|
|
925
787
|
--max-num-seqs: 256
|
|
926
|
-
--compilation-config: 3
|
|
927
788
|
Qwen2.5-Math-7B-Instruct:
|
|
928
789
|
model_family: Qwen2.5
|
|
929
790
|
model_variant: Math-7B-Instruct
|
|
@@ -931,13 +792,11 @@ models:
|
|
|
931
792
|
gpus_per_node: 1
|
|
932
793
|
num_nodes: 1
|
|
933
794
|
vocab_size: 152064
|
|
934
|
-
qos: m2
|
|
935
795
|
time: 08:00:00
|
|
936
|
-
|
|
796
|
+
resource_type: l40s
|
|
937
797
|
vllm_args:
|
|
938
798
|
--max-model-len: 4096
|
|
939
799
|
--max-num-seqs: 256
|
|
940
|
-
--compilation-config: 3
|
|
941
800
|
Qwen2.5-Math-72B-Instruct:
|
|
942
801
|
model_family: Qwen2.5
|
|
943
802
|
model_variant: Math-72B-Instruct
|
|
@@ -945,14 +804,12 @@ models:
|
|
|
945
804
|
gpus_per_node: 4
|
|
946
805
|
num_nodes: 1
|
|
947
806
|
vocab_size: 152064
|
|
948
|
-
qos: m2
|
|
949
807
|
time: 08:00:00
|
|
950
|
-
|
|
808
|
+
resource_type: l40s
|
|
951
809
|
vllm_args:
|
|
952
810
|
--tensor-parallel-size: 4
|
|
953
811
|
--max-model-len: 4096
|
|
954
812
|
--max-num-seqs: 256
|
|
955
|
-
--compilation-config: 3
|
|
956
813
|
Qwen2.5-Coder-7B-Instruct:
|
|
957
814
|
model_family: Qwen2.5
|
|
958
815
|
model_variant: Coder-7B-Instruct
|
|
@@ -960,13 +817,11 @@ models:
|
|
|
960
817
|
gpus_per_node: 1
|
|
961
818
|
num_nodes: 1
|
|
962
819
|
vocab_size: 152064
|
|
963
|
-
qos: m2
|
|
964
820
|
time: 08:00:00
|
|
965
|
-
|
|
821
|
+
resource_type: l40s
|
|
966
822
|
vllm_args:
|
|
967
823
|
--max-model-len: 32768
|
|
968
824
|
--max-num-seqs: 256
|
|
969
|
-
--compilation-config: 3
|
|
970
825
|
Qwen2.5-Math-RM-72B:
|
|
971
826
|
model_family: Qwen2.5
|
|
972
827
|
model_variant: Math-RM-72B
|
|
@@ -974,14 +829,12 @@ models:
|
|
|
974
829
|
gpus_per_node: 4
|
|
975
830
|
num_nodes: 1
|
|
976
831
|
vocab_size: 152064
|
|
977
|
-
qos: m2
|
|
978
832
|
time: 08:00:00
|
|
979
|
-
|
|
833
|
+
resource_type: l40s
|
|
980
834
|
vllm_args:
|
|
981
835
|
--tensor-parallel-size: 4
|
|
982
836
|
--max-model-len: 4096
|
|
983
837
|
--max-num-seqs: 256
|
|
984
|
-
--compilation-config: 3
|
|
985
838
|
Qwen2.5-Math-PRM-7B:
|
|
986
839
|
model_family: Qwen2.5
|
|
987
840
|
model_variant: Math-PRM-7B
|
|
@@ -989,28 +842,24 @@ models:
|
|
|
989
842
|
gpus_per_node: 1
|
|
990
843
|
num_nodes: 1
|
|
991
844
|
vocab_size: 152064
|
|
992
|
-
qos: m2
|
|
993
845
|
time: 08:00:00
|
|
994
|
-
|
|
846
|
+
resource_type: l40s
|
|
995
847
|
vllm_args:
|
|
996
848
|
--max-model-len: 4096
|
|
997
849
|
--max-num-seqs: 256
|
|
998
|
-
|
|
999
|
-
QwQ-32B-Preview:
|
|
850
|
+
QwQ-32B:
|
|
1000
851
|
model_family: QwQ
|
|
1001
|
-
model_variant: 32B
|
|
852
|
+
model_variant: 32B
|
|
1002
853
|
model_type: LLM
|
|
1003
854
|
gpus_per_node: 2
|
|
1004
855
|
num_nodes: 1
|
|
1005
856
|
vocab_size: 152064
|
|
1006
|
-
qos: m2
|
|
1007
857
|
time: 08:00:00
|
|
1008
|
-
|
|
858
|
+
resource_type: l40s
|
|
1009
859
|
vllm_args:
|
|
1010
860
|
--tensor-parallel-size: 2
|
|
1011
861
|
--max-model-len: 32768
|
|
1012
862
|
--max-num-seqs: 256
|
|
1013
|
-
--compilation-config: 3
|
|
1014
863
|
Pixtral-12B-2409:
|
|
1015
864
|
model_family: Pixtral
|
|
1016
865
|
model_variant: 12B-2409
|
|
@@ -1018,13 +867,11 @@ models:
|
|
|
1018
867
|
gpus_per_node: 1
|
|
1019
868
|
num_nodes: 1
|
|
1020
869
|
vocab_size: 131072
|
|
1021
|
-
qos: m2
|
|
1022
870
|
time: 08:00:00
|
|
1023
|
-
|
|
871
|
+
resource_type: l40s
|
|
1024
872
|
vllm_args:
|
|
1025
873
|
--max-model-len: 8192
|
|
1026
874
|
--max-num-seqs: 256
|
|
1027
|
-
--compilation-config: 3
|
|
1028
875
|
e5-mistral-7b-instruct:
|
|
1029
876
|
model_family: e5
|
|
1030
877
|
model_variant: mistral-7b-instruct
|
|
@@ -1032,13 +879,11 @@ models:
|
|
|
1032
879
|
gpus_per_node: 1
|
|
1033
880
|
num_nodes: 1
|
|
1034
881
|
vocab_size: 32000
|
|
1035
|
-
qos: m2
|
|
1036
882
|
time: 08:00:00
|
|
1037
|
-
|
|
883
|
+
resource_type: l40s
|
|
1038
884
|
vllm_args:
|
|
1039
885
|
--max-model-len: 4096
|
|
1040
886
|
--max-num-seqs: 256
|
|
1041
|
-
--compilation-config: 3
|
|
1042
887
|
bge-base-en-v1.5:
|
|
1043
888
|
model_family: bge
|
|
1044
889
|
model_variant: base-en-v1.5
|
|
@@ -1046,13 +891,11 @@ models:
|
|
|
1046
891
|
gpus_per_node: 1
|
|
1047
892
|
num_nodes: 1
|
|
1048
893
|
vocab_size: 30522
|
|
1049
|
-
qos: m2
|
|
1050
894
|
time: 08:00:00
|
|
1051
|
-
|
|
895
|
+
resource_type: l40s
|
|
1052
896
|
vllm_args:
|
|
1053
897
|
--max-model-len: 512
|
|
1054
898
|
--max-num-seqs: 256
|
|
1055
|
-
--compilation-config: 3
|
|
1056
899
|
all-MiniLM-L6-v2:
|
|
1057
900
|
model_family: all-MiniLM
|
|
1058
901
|
model_variant: L6-v2
|
|
@@ -1060,13 +903,11 @@ models:
|
|
|
1060
903
|
gpus_per_node: 1
|
|
1061
904
|
num_nodes: 1
|
|
1062
905
|
vocab_size: 30522
|
|
1063
|
-
qos: m2
|
|
1064
906
|
time: 08:00:00
|
|
1065
|
-
|
|
907
|
+
resource_type: l40s
|
|
1066
908
|
vllm_args:
|
|
1067
909
|
--max-model-len: 512
|
|
1068
910
|
--max-num-seqs: 256
|
|
1069
|
-
--compilation-config: 3
|
|
1070
911
|
Llama-3.3-70B-Instruct:
|
|
1071
912
|
model_family: Llama-3.3
|
|
1072
913
|
model_variant: 70B-Instruct
|
|
@@ -1074,14 +915,12 @@ models:
|
|
|
1074
915
|
gpus_per_node: 4
|
|
1075
916
|
num_nodes: 1
|
|
1076
917
|
vocab_size: 128256
|
|
1077
|
-
qos: m2
|
|
1078
918
|
time: 08:00:00
|
|
1079
|
-
|
|
919
|
+
resource_type: l40s
|
|
1080
920
|
vllm_args:
|
|
1081
921
|
--tensor-parallel-size: 4
|
|
1082
922
|
--max-model-len: 65536
|
|
1083
923
|
--max-num-seqs: 256
|
|
1084
|
-
--compilation-config: 3
|
|
1085
924
|
InternVL2_5-26B:
|
|
1086
925
|
model_family: InternVL2_5
|
|
1087
926
|
model_variant: 26B
|
|
@@ -1089,14 +928,12 @@ models:
|
|
|
1089
928
|
gpus_per_node: 2
|
|
1090
929
|
num_nodes: 1
|
|
1091
930
|
vocab_size: 92553
|
|
1092
|
-
qos: m2
|
|
1093
931
|
time: 08:00:00
|
|
1094
|
-
|
|
932
|
+
resource_type: l40s
|
|
1095
933
|
vllm_args:
|
|
1096
934
|
--tensor-parallel-size: 2
|
|
1097
935
|
--max-model-len: 32768
|
|
1098
936
|
--max-num-seqs: 256
|
|
1099
|
-
--compilation-config: 3
|
|
1100
937
|
InternVL2_5-38B:
|
|
1101
938
|
model_family: InternVL2_5
|
|
1102
939
|
model_variant: 38B
|
|
@@ -1104,14 +941,12 @@ models:
|
|
|
1104
941
|
gpus_per_node: 4
|
|
1105
942
|
num_nodes: 1
|
|
1106
943
|
vocab_size: 92553
|
|
1107
|
-
qos: m2
|
|
1108
944
|
time: 08:00:00
|
|
1109
|
-
|
|
945
|
+
resource_type: l40s
|
|
1110
946
|
vllm_args:
|
|
1111
947
|
--tensor-parallel-size: 4
|
|
1112
948
|
--max-model-len: 32768
|
|
1113
949
|
--max-num-seqs: 256
|
|
1114
|
-
--compilation-config: 3
|
|
1115
950
|
Aya-Expanse-32B:
|
|
1116
951
|
model_family: Aya-Expanse
|
|
1117
952
|
model_variant: 32B
|
|
@@ -1119,14 +954,12 @@ models:
|
|
|
1119
954
|
gpus_per_node: 2
|
|
1120
955
|
num_nodes: 1
|
|
1121
956
|
vocab_size: 256000
|
|
1122
|
-
qos: m2
|
|
1123
957
|
time: 08:00:00
|
|
1124
|
-
|
|
958
|
+
resource_type: l40s
|
|
1125
959
|
vllm_args:
|
|
1126
960
|
--tensor-parallel-size: 2
|
|
1127
961
|
--max-model-len: 8192
|
|
1128
962
|
--max-num-seqs: 256
|
|
1129
|
-
--compilation-config: 3
|
|
1130
963
|
DeepSeek-R1-Distill-Llama-70B:
|
|
1131
964
|
model_family: DeepSeek-R1
|
|
1132
965
|
model_variant: Distill-Llama-70B
|
|
@@ -1134,14 +967,12 @@ models:
|
|
|
1134
967
|
gpus_per_node: 4
|
|
1135
968
|
num_nodes: 1
|
|
1136
969
|
vocab_size: 128256
|
|
1137
|
-
qos: m2
|
|
1138
970
|
time: 08:00:00
|
|
1139
|
-
|
|
971
|
+
resource_type: l40s
|
|
1140
972
|
vllm_args:
|
|
1141
973
|
--tensor-parallel-size: 4
|
|
1142
974
|
--max-model-len: 65536
|
|
1143
975
|
--max-num-seqs: 256
|
|
1144
|
-
--compilation-config: 3
|
|
1145
976
|
DeepSeek-R1-Distill-Llama-8B:
|
|
1146
977
|
model_family: DeepSeek-R1
|
|
1147
978
|
model_variant: Distill-Llama-8B
|
|
@@ -1149,13 +980,11 @@ models:
|
|
|
1149
980
|
gpus_per_node: 1
|
|
1150
981
|
num_nodes: 1
|
|
1151
982
|
vocab_size: 128256
|
|
1152
|
-
qos: m2
|
|
1153
983
|
time: 08:00:00
|
|
1154
|
-
|
|
984
|
+
resource_type: l40s
|
|
1155
985
|
vllm_args:
|
|
1156
986
|
--max-model-len: 131072
|
|
1157
987
|
--max-num-seqs: 256
|
|
1158
|
-
--compilation-config: 3
|
|
1159
988
|
DeepSeek-R1-Distill-Qwen-32B:
|
|
1160
989
|
model_family: DeepSeek-R1
|
|
1161
990
|
model_variant: Distill-Qwen-32B
|
|
@@ -1163,14 +992,12 @@ models:
|
|
|
1163
992
|
gpus_per_node: 2
|
|
1164
993
|
num_nodes: 1
|
|
1165
994
|
vocab_size: 152064
|
|
1166
|
-
qos: m2
|
|
1167
995
|
time: 08:00:00
|
|
1168
|
-
|
|
996
|
+
resource_type: l40s
|
|
1169
997
|
vllm_args:
|
|
1170
998
|
--tensor-parallel-size: 2
|
|
1171
999
|
--max-model-len: 65536
|
|
1172
1000
|
--max-num-seqs: 256
|
|
1173
|
-
--compilation-config: 3
|
|
1174
1001
|
DeepSeek-R1-Distill-Qwen-14B:
|
|
1175
1002
|
model_family: DeepSeek-R1
|
|
1176
1003
|
model_variant: Distill-Qwen-14B
|
|
@@ -1178,13 +1005,11 @@ models:
|
|
|
1178
1005
|
gpus_per_node: 1
|
|
1179
1006
|
num_nodes: 1
|
|
1180
1007
|
vocab_size: 152064
|
|
1181
|
-
qos: m2
|
|
1182
1008
|
time: 08:00:00
|
|
1183
|
-
|
|
1009
|
+
resource_type: l40s
|
|
1184
1010
|
vllm_args:
|
|
1185
1011
|
--max-model-len: 65536
|
|
1186
1012
|
--max-num-seqs: 256
|
|
1187
|
-
--compilation-config: 3
|
|
1188
1013
|
DeepSeek-R1-Distill-Qwen-7B:
|
|
1189
1014
|
model_family: DeepSeek-R1
|
|
1190
1015
|
model_variant: Distill-Qwen-7B
|
|
@@ -1192,13 +1017,11 @@ models:
|
|
|
1192
1017
|
gpus_per_node: 1
|
|
1193
1018
|
num_nodes: 1
|
|
1194
1019
|
vocab_size: 152064
|
|
1195
|
-
qos: m2
|
|
1196
1020
|
time: 08:00:00
|
|
1197
|
-
|
|
1021
|
+
resource_type: l40s
|
|
1198
1022
|
vllm_args:
|
|
1199
1023
|
--max-model-len: 131072
|
|
1200
1024
|
--max-num-seqs: 256
|
|
1201
|
-
--compilation-config: 3
|
|
1202
1025
|
DeepSeek-R1-Distill-Qwen-1.5B:
|
|
1203
1026
|
model_family: DeepSeek-R1
|
|
1204
1027
|
model_variant: Distill-Qwen-1.5B
|
|
@@ -1206,13 +1029,11 @@ models:
|
|
|
1206
1029
|
gpus_per_node: 1
|
|
1207
1030
|
num_nodes: 1
|
|
1208
1031
|
vocab_size: 152064
|
|
1209
|
-
qos: m2
|
|
1210
1032
|
time: 08:00:00
|
|
1211
|
-
|
|
1033
|
+
resource_type: l40s
|
|
1212
1034
|
vllm_args:
|
|
1213
1035
|
--max-model-len: 131072
|
|
1214
1036
|
--max-num-seqs: 256
|
|
1215
|
-
--compilation-config: 3
|
|
1216
1037
|
Phi-3.5-vision-instruct:
|
|
1217
1038
|
model_family: Phi-3.5-vision
|
|
1218
1039
|
model_variant: instruct
|
|
@@ -1220,14 +1041,12 @@ models:
|
|
|
1220
1041
|
gpus_per_node: 2
|
|
1221
1042
|
num_nodes: 1
|
|
1222
1043
|
vocab_size: 32064
|
|
1223
|
-
qos: m2
|
|
1224
1044
|
time: 08:00:00
|
|
1225
|
-
|
|
1045
|
+
resource_type: l40s
|
|
1226
1046
|
vllm_args:
|
|
1227
1047
|
--tensor-parallel-size: 2
|
|
1228
1048
|
--max-model-len: 65536
|
|
1229
1049
|
--max-num-seqs: 256
|
|
1230
|
-
--compilation-config: 3
|
|
1231
1050
|
InternVL2_5-8B:
|
|
1232
1051
|
model_family: InternVL2_5
|
|
1233
1052
|
model_variant: 8B
|
|
@@ -1235,13 +1054,11 @@ models:
|
|
|
1235
1054
|
gpus_per_node: 1
|
|
1236
1055
|
num_nodes: 1
|
|
1237
1056
|
vocab_size: 92553
|
|
1238
|
-
qos: m2
|
|
1239
1057
|
time: 08:00:00
|
|
1240
|
-
|
|
1058
|
+
resource_type: l40s
|
|
1241
1059
|
vllm_args:
|
|
1242
1060
|
--max-model-len: 32768
|
|
1243
1061
|
--max-num-seqs: 256
|
|
1244
|
-
--compilation-config: 3
|
|
1245
1062
|
glm-4v-9b:
|
|
1246
1063
|
model_family: glm-4v
|
|
1247
1064
|
model_variant: 9b
|
|
@@ -1249,13 +1066,11 @@ models:
|
|
|
1249
1066
|
gpus_per_node: 1
|
|
1250
1067
|
num_nodes: 1
|
|
1251
1068
|
vocab_size: 151552
|
|
1252
|
-
qos: m2
|
|
1253
1069
|
time: 08:00:00
|
|
1254
|
-
|
|
1070
|
+
resource_type: l40s
|
|
1255
1071
|
vllm_args:
|
|
1256
1072
|
--max-model-len: 8192
|
|
1257
1073
|
--max-num-seqs: 256
|
|
1258
|
-
--compilation-config: 3
|
|
1259
1074
|
Molmo-7B-D-0924:
|
|
1260
1075
|
model_family: Molmo
|
|
1261
1076
|
model_variant: 7B-D-0924
|
|
@@ -1263,27 +1078,23 @@ models:
|
|
|
1263
1078
|
gpus_per_node: 1
|
|
1264
1079
|
num_nodes: 1
|
|
1265
1080
|
vocab_size: 152064
|
|
1266
|
-
qos: m2
|
|
1267
1081
|
time: 08:00:00
|
|
1268
|
-
|
|
1082
|
+
resource_type: l40s
|
|
1269
1083
|
vllm_args:
|
|
1270
1084
|
--max-model-len: 4096
|
|
1271
1085
|
--max-num-seqs: 256
|
|
1272
|
-
--compilation-config: 3
|
|
1273
1086
|
deepseek-vl2:
|
|
1274
1087
|
model_family: deepseek-vl2
|
|
1275
1088
|
model_type: VLM
|
|
1276
1089
|
gpus_per_node: 2
|
|
1277
1090
|
num_nodes: 1
|
|
1278
1091
|
vocab_size: 129280
|
|
1279
|
-
qos: m2
|
|
1280
1092
|
time: 08:00:00
|
|
1281
|
-
|
|
1093
|
+
resource_type: l40s
|
|
1282
1094
|
vllm_args:
|
|
1283
1095
|
--tensor-parallel-size: 2
|
|
1284
1096
|
--max-model-len: 4096
|
|
1285
1097
|
--max-num-seqs: 256
|
|
1286
|
-
--compilation-config: 3
|
|
1287
1098
|
deepseek-vl2-small:
|
|
1288
1099
|
model_family: deepseek-vl2
|
|
1289
1100
|
model_variant: small
|
|
@@ -1291,10 +1102,20 @@ models:
|
|
|
1291
1102
|
gpus_per_node: 1
|
|
1292
1103
|
num_nodes: 1
|
|
1293
1104
|
vocab_size: 129280
|
|
1294
|
-
qos: m2
|
|
1295
1105
|
time: 08:00:00
|
|
1296
|
-
|
|
1106
|
+
resource_type: l40s
|
|
1297
1107
|
vllm_args:
|
|
1298
1108
|
--max-model-len: 4096
|
|
1299
1109
|
--max-num-seqs: 256
|
|
1300
|
-
|
|
1110
|
+
Qwen3-14B:
|
|
1111
|
+
model_family: Qwen3
|
|
1112
|
+
model_variant: 14B
|
|
1113
|
+
model_type: LLM
|
|
1114
|
+
gpus_per_node: 1
|
|
1115
|
+
num_nodes: 1
|
|
1116
|
+
vocab_size: 151936
|
|
1117
|
+
time: 08:00:00
|
|
1118
|
+
resource_type: l40s
|
|
1119
|
+
vllm_args:
|
|
1120
|
+
--max-model-len: 40960
|
|
1121
|
+
--max-num-seqs: 256
|