vec-inf 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +18 -4
- vec_inf/cli/_cli.py +191 -34
- vec_inf/cli/_helper.py +95 -14
- vec_inf/client/_client_vars.py +7 -165
- vec_inf/client/_helper.py +386 -40
- vec_inf/client/_slurm_script_generator.py +204 -36
- vec_inf/client/_slurm_templates.py +248 -0
- vec_inf/client/_slurm_vars.py +86 -0
- vec_inf/client/_utils.py +189 -70
- vec_inf/client/api.py +96 -25
- vec_inf/client/config.py +40 -19
- vec_inf/client/models.py +44 -4
- vec_inf/config/README.md +4 -243
- vec_inf/config/environment.yaml +35 -0
- vec_inf/config/models.yaml +102 -274
- {vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/METADATA +43 -73
- vec_inf-0.7.1.dist-info/RECORD +27 -0
- vec_inf/client/slurm_vars.py +0 -49
- vec_inf-0.6.1.dist-info/RECORD +0 -25
- {vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/WHEEL +0 -0
- {vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/licenses/LICENSE +0 -0
vec_inf/config/models.yaml
CHANGED
|
@@ -1,19 +1,4 @@
|
|
|
1
1
|
models:
|
|
2
|
-
c4ai-command-r-plus:
|
|
3
|
-
model_family: c4ai-command-r
|
|
4
|
-
model_variant: plus
|
|
5
|
-
model_type: LLM
|
|
6
|
-
gpus_per_node: 4
|
|
7
|
-
num_nodes: 2
|
|
8
|
-
vocab_size: 256000
|
|
9
|
-
qos: m2
|
|
10
|
-
time: 08:00:00
|
|
11
|
-
partition: a40
|
|
12
|
-
vllm_args:
|
|
13
|
-
--pipeline-parallel-size: 2
|
|
14
|
-
--tensor-parallel-size: 4
|
|
15
|
-
--max-model-len: 8192
|
|
16
|
-
--max-num-seqs: 256
|
|
17
2
|
c4ai-command-r-plus-08-2024:
|
|
18
3
|
model_family: c4ai-command-r
|
|
19
4
|
model_variant: plus-08-2024
|
|
@@ -21,9 +6,8 @@ models:
|
|
|
21
6
|
gpus_per_node: 4
|
|
22
7
|
num_nodes: 2
|
|
23
8
|
vocab_size: 256000
|
|
24
|
-
qos: m2
|
|
25
9
|
time: 08:00:00
|
|
26
|
-
|
|
10
|
+
resource_type: l40s
|
|
27
11
|
vllm_args:
|
|
28
12
|
--pipeline-parallel-size: 2
|
|
29
13
|
--tensor-parallel-size: 4
|
|
@@ -36,14 +20,12 @@ models:
|
|
|
36
20
|
gpus_per_node: 2
|
|
37
21
|
num_nodes: 1
|
|
38
22
|
vocab_size: 256000
|
|
39
|
-
qos: m2
|
|
40
23
|
time: 08:00:00
|
|
41
|
-
|
|
24
|
+
resource_type: l40s
|
|
42
25
|
vllm_args:
|
|
43
26
|
--tensor-parallel-size: 2
|
|
44
27
|
--max-model-len: 32768
|
|
45
28
|
--max-num-seqs: 256
|
|
46
|
-
--compilation-config: 3
|
|
47
29
|
CodeLlama-7b-hf:
|
|
48
30
|
model_family: CodeLlama
|
|
49
31
|
model_variant: 7b-hf
|
|
@@ -51,13 +33,11 @@ models:
|
|
|
51
33
|
gpus_per_node: 1
|
|
52
34
|
num_nodes: 1
|
|
53
35
|
vocab_size: 32000
|
|
54
|
-
qos: m2
|
|
55
36
|
time: 08:00:00
|
|
56
|
-
|
|
37
|
+
resource_type: l40s
|
|
57
38
|
vllm_args:
|
|
58
39
|
--max-model-len: 16384
|
|
59
40
|
--max-num-seqs: 256
|
|
60
|
-
--compilation-config: 3
|
|
61
41
|
CodeLlama-7b-Instruct-hf:
|
|
62
42
|
model_family: CodeLlama
|
|
63
43
|
model_variant: 7b-Instruct-hf
|
|
@@ -65,13 +45,11 @@ models:
|
|
|
65
45
|
gpus_per_node: 1
|
|
66
46
|
num_nodes: 1
|
|
67
47
|
vocab_size: 32000
|
|
68
|
-
qos: m2
|
|
69
48
|
time: 08:00:00
|
|
70
|
-
|
|
49
|
+
resource_type: l40s
|
|
71
50
|
vllm_args:
|
|
72
51
|
--max-model-len: 16384
|
|
73
52
|
--max-num-seqs: 256
|
|
74
|
-
--compilation-config: 3
|
|
75
53
|
CodeLlama-13b-hf:
|
|
76
54
|
model_family: CodeLlama
|
|
77
55
|
model_variant: 13b-hf
|
|
@@ -79,13 +57,11 @@ models:
|
|
|
79
57
|
gpus_per_node: 1
|
|
80
58
|
num_nodes: 1
|
|
81
59
|
vocab_size: 32000
|
|
82
|
-
qos: m2
|
|
83
60
|
time: 08:00:00
|
|
84
|
-
|
|
61
|
+
resource_type: l40s
|
|
85
62
|
vllm_args:
|
|
86
63
|
--max-model-len: 16384
|
|
87
64
|
--max-num-seqs: 256
|
|
88
|
-
--compilation-config: 3
|
|
89
65
|
CodeLlama-13b-Instruct-hf:
|
|
90
66
|
model_family: CodeLlama
|
|
91
67
|
model_variant: 13b-Instruct-hf
|
|
@@ -93,13 +69,11 @@ models:
|
|
|
93
69
|
gpus_per_node: 1
|
|
94
70
|
num_nodes: 1
|
|
95
71
|
vocab_size: 32000
|
|
96
|
-
qos: m2
|
|
97
72
|
time: 08:00:00
|
|
98
|
-
|
|
73
|
+
resource_type: l40s
|
|
99
74
|
vllm_args:
|
|
100
75
|
--max-model-len: 16384
|
|
101
76
|
--max-num-seqs: 256
|
|
102
|
-
--compilation-config: 3
|
|
103
77
|
CodeLlama-34b-hf:
|
|
104
78
|
model_family: CodeLlama
|
|
105
79
|
model_variant: 34b-hf
|
|
@@ -107,14 +81,12 @@ models:
|
|
|
107
81
|
gpus_per_node: 2
|
|
108
82
|
num_nodes: 1
|
|
109
83
|
vocab_size: 32000
|
|
110
|
-
qos: m2
|
|
111
84
|
time: 08:00:00
|
|
112
|
-
|
|
85
|
+
resource_type: l40s
|
|
113
86
|
vllm_args:
|
|
114
87
|
--tensor-parallel-size: 2
|
|
115
88
|
--max-model-len: 16384
|
|
116
89
|
--max-num-seqs: 256
|
|
117
|
-
--compilation-config: 3
|
|
118
90
|
CodeLlama-34b-Instruct-hf:
|
|
119
91
|
model_family: CodeLlama
|
|
120
92
|
model_variant: 34b-Instruct-hf
|
|
@@ -122,14 +94,12 @@ models:
|
|
|
122
94
|
gpus_per_node: 2
|
|
123
95
|
num_nodes: 1
|
|
124
96
|
vocab_size: 32000
|
|
125
|
-
qos: m2
|
|
126
97
|
time: 08:00:00
|
|
127
|
-
|
|
98
|
+
resource_type: l40s
|
|
128
99
|
vllm_args:
|
|
129
100
|
--tensor-parallel-size: 2
|
|
130
101
|
--max-model-len: 16384
|
|
131
102
|
--max-num-seqs: 256
|
|
132
|
-
--compilation-config: 3
|
|
133
103
|
CodeLlama-70b-hf:
|
|
134
104
|
model_family: CodeLlama
|
|
135
105
|
model_variant: 70b-hf
|
|
@@ -137,14 +107,12 @@ models:
|
|
|
137
107
|
gpus_per_node: 4
|
|
138
108
|
num_nodes: 1
|
|
139
109
|
vocab_size: 32016
|
|
140
|
-
qos: m2
|
|
141
110
|
time: 08:00:00
|
|
142
|
-
|
|
111
|
+
resource_type: l40s
|
|
143
112
|
vllm_args:
|
|
144
113
|
--tensor-parallel-size: 4
|
|
145
114
|
--max-model-len: 4096
|
|
146
115
|
--max-num-seqs: 256
|
|
147
|
-
--compilation-config: 3
|
|
148
116
|
CodeLlama-70b-Instruct-hf:
|
|
149
117
|
model_family: CodeLlama
|
|
150
118
|
model_variant: 70b-Instruct-hf
|
|
@@ -152,14 +120,12 @@ models:
|
|
|
152
120
|
gpus_per_node: 4
|
|
153
121
|
num_nodes: 1
|
|
154
122
|
vocab_size: 32016
|
|
155
|
-
qos: m2
|
|
156
123
|
time: 08:00:00
|
|
157
|
-
|
|
124
|
+
resource_type: l40s
|
|
158
125
|
vllm_args:
|
|
159
126
|
--tensor-parallel-size: 4
|
|
160
127
|
--max-model-len: 4096
|
|
161
128
|
--max-num-seqs: 256
|
|
162
|
-
--compilation-config: 3
|
|
163
129
|
gemma-2-9b:
|
|
164
130
|
model_family: gemma-2
|
|
165
131
|
model_variant: 9b
|
|
@@ -167,13 +133,11 @@ models:
|
|
|
167
133
|
gpus_per_node: 1
|
|
168
134
|
num_nodes: 1
|
|
169
135
|
vocab_size: 256000
|
|
170
|
-
qos: m2
|
|
171
136
|
time: 08:00:00
|
|
172
|
-
|
|
137
|
+
resource_type: l40s
|
|
173
138
|
vllm_args:
|
|
174
139
|
--max-model-len: 4096
|
|
175
140
|
--max-num-seqs: 256
|
|
176
|
-
--compilation-config: 3
|
|
177
141
|
gemma-2-9b-it:
|
|
178
142
|
model_family: gemma-2
|
|
179
143
|
model_variant: 9b-it
|
|
@@ -181,13 +145,11 @@ models:
|
|
|
181
145
|
gpus_per_node: 1
|
|
182
146
|
num_nodes: 1
|
|
183
147
|
vocab_size: 256000
|
|
184
|
-
qos: m2
|
|
185
148
|
time: 08:00:00
|
|
186
|
-
|
|
149
|
+
resource_type: l40s
|
|
187
150
|
vllm_args:
|
|
188
151
|
--max-model-len: 4096
|
|
189
152
|
--max-num-seqs: 256
|
|
190
|
-
--compilation-config: 3
|
|
191
153
|
gemma-2-27b:
|
|
192
154
|
model_family: gemma-2
|
|
193
155
|
model_variant: 27b
|
|
@@ -195,14 +157,12 @@ models:
|
|
|
195
157
|
gpus_per_node: 2
|
|
196
158
|
num_nodes: 1
|
|
197
159
|
vocab_size: 256000
|
|
198
|
-
qos: m2
|
|
199
160
|
time: 08:00:00
|
|
200
|
-
|
|
161
|
+
resource_type: l40s
|
|
201
162
|
vllm_args:
|
|
202
163
|
--tensor-parallel-size: 2
|
|
203
164
|
--max-model-len: 4096
|
|
204
165
|
--max-num-seqs: 256
|
|
205
|
-
--compilation-config: 3
|
|
206
166
|
gemma-2-27b-it:
|
|
207
167
|
model_family: gemma-2
|
|
208
168
|
model_variant: 27b-it
|
|
@@ -210,14 +170,12 @@ models:
|
|
|
210
170
|
gpus_per_node: 2
|
|
211
171
|
num_nodes: 1
|
|
212
172
|
vocab_size: 256000
|
|
213
|
-
qos: m2
|
|
214
173
|
time: 08:00:00
|
|
215
|
-
|
|
174
|
+
resource_type: l40s
|
|
216
175
|
vllm_args:
|
|
217
176
|
--tensor-parallel-size: 2
|
|
218
177
|
--max-model-len: 4096
|
|
219
178
|
--max-num-seqs: 256
|
|
220
|
-
--compilation-config: 3
|
|
221
179
|
Llama-2-7b-hf:
|
|
222
180
|
model_family: Llama-2
|
|
223
181
|
model_variant: 7b-hf
|
|
@@ -225,13 +183,11 @@ models:
|
|
|
225
183
|
gpus_per_node: 1
|
|
226
184
|
num_nodes: 1
|
|
227
185
|
vocab_size: 32000
|
|
228
|
-
qos: m2
|
|
229
186
|
time: 08:00:00
|
|
230
|
-
|
|
187
|
+
resource_type: l40s
|
|
231
188
|
vllm_args:
|
|
232
189
|
--max-model-len: 4096
|
|
233
190
|
--max-num-seqs: 256
|
|
234
|
-
--compilation-config: 3
|
|
235
191
|
Llama-2-7b-chat-hf:
|
|
236
192
|
model_family: Llama-2
|
|
237
193
|
model_variant: 7b-chat-hf
|
|
@@ -239,13 +195,11 @@ models:
|
|
|
239
195
|
gpus_per_node: 1
|
|
240
196
|
num_nodes: 1
|
|
241
197
|
vocab_size: 32000
|
|
242
|
-
qos: m2
|
|
243
198
|
time: 08:00:00
|
|
244
|
-
|
|
199
|
+
resource_type: l40s
|
|
245
200
|
vllm_args:
|
|
246
201
|
--max-model-len: 4096
|
|
247
202
|
--max-num-seqs: 256
|
|
248
|
-
--compilation-config: 3
|
|
249
203
|
Llama-2-13b-hf:
|
|
250
204
|
model_family: Llama-2
|
|
251
205
|
model_variant: 13b-hf
|
|
@@ -253,13 +207,11 @@ models:
|
|
|
253
207
|
gpus_per_node: 1
|
|
254
208
|
num_nodes: 1
|
|
255
209
|
vocab_size: 32000
|
|
256
|
-
qos: m2
|
|
257
210
|
time: 08:00:00
|
|
258
|
-
|
|
211
|
+
resource_type: l40s
|
|
259
212
|
vllm_args:
|
|
260
213
|
--max-model-len: 4096
|
|
261
214
|
--max-num-seqs: 256
|
|
262
|
-
--compilation-config: 3
|
|
263
215
|
Llama-2-13b-chat-hf:
|
|
264
216
|
model_family: Llama-2
|
|
265
217
|
model_variant: 13b-chat-hf
|
|
@@ -267,13 +219,11 @@ models:
|
|
|
267
219
|
gpus_per_node: 1
|
|
268
220
|
num_nodes: 1
|
|
269
221
|
vocab_size: 32000
|
|
270
|
-
qos: m2
|
|
271
222
|
time: 08:00:00
|
|
272
|
-
|
|
223
|
+
resource_type: l40s
|
|
273
224
|
vllm_args:
|
|
274
225
|
--max-model-len: 4096
|
|
275
226
|
--max-num-seqs: 256
|
|
276
|
-
--compilation-config: 3
|
|
277
227
|
Llama-2-70b-hf:
|
|
278
228
|
model_family: Llama-2
|
|
279
229
|
model_variant: 70b-hf
|
|
@@ -281,14 +231,12 @@ models:
|
|
|
281
231
|
gpus_per_node: 4
|
|
282
232
|
num_nodes: 1
|
|
283
233
|
vocab_size: 32000
|
|
284
|
-
qos: m2
|
|
285
234
|
time: 08:00:00
|
|
286
|
-
|
|
235
|
+
resource_type: l40s
|
|
287
236
|
vllm_args:
|
|
288
237
|
--tensor-parallel-size: 4
|
|
289
238
|
--max-model-len: 4096
|
|
290
239
|
--max-num-seqs: 256
|
|
291
|
-
--compilation-config: 3
|
|
292
240
|
Llama-2-70b-chat-hf:
|
|
293
241
|
model_family: Llama-2
|
|
294
242
|
model_variant: 70b-chat-hf
|
|
@@ -296,14 +244,12 @@ models:
|
|
|
296
244
|
gpus_per_node: 4
|
|
297
245
|
num_nodes: 1
|
|
298
246
|
vocab_size: 32000
|
|
299
|
-
qos: m2
|
|
300
247
|
time: 08:00:00
|
|
301
|
-
|
|
248
|
+
resource_type: l40s
|
|
302
249
|
vllm_args:
|
|
303
250
|
--tensor-parallel-size: 4
|
|
304
251
|
--max-model-len: 4096
|
|
305
252
|
--max-num-seqs: 256
|
|
306
|
-
--compilation-config: 3
|
|
307
253
|
llava-1.5-7b-hf:
|
|
308
254
|
model_family: llava-1.5
|
|
309
255
|
model_variant: 7b-hf
|
|
@@ -311,13 +257,11 @@ models:
|
|
|
311
257
|
gpus_per_node: 1
|
|
312
258
|
num_nodes: 1
|
|
313
259
|
vocab_size: 32000
|
|
314
|
-
qos: m2
|
|
315
260
|
time: 08:00:00
|
|
316
|
-
|
|
261
|
+
resource_type: l40s
|
|
317
262
|
vllm_args:
|
|
318
263
|
--max-model-len: 4096
|
|
319
264
|
--max-num-seqs: 256
|
|
320
|
-
--compilation-config: 3
|
|
321
265
|
llava-1.5-13b-hf:
|
|
322
266
|
model_family: llava-1.5
|
|
323
267
|
model_variant: 13b-hf
|
|
@@ -325,13 +269,11 @@ models:
|
|
|
325
269
|
gpus_per_node: 1
|
|
326
270
|
num_nodes: 1
|
|
327
271
|
vocab_size: 32000
|
|
328
|
-
qos: m2
|
|
329
272
|
time: 08:00:00
|
|
330
|
-
|
|
273
|
+
resource_type: l40s
|
|
331
274
|
vllm_args:
|
|
332
275
|
--max-model-len: 4096
|
|
333
276
|
--max-num-seqs: 256
|
|
334
|
-
--compilation-config: 3
|
|
335
277
|
llava-v1.6-mistral-7b-hf:
|
|
336
278
|
model_family: llava-v1.6
|
|
337
279
|
model_variant: mistral-7b-hf
|
|
@@ -339,13 +281,11 @@ models:
|
|
|
339
281
|
gpus_per_node: 1
|
|
340
282
|
num_nodes: 1
|
|
341
283
|
vocab_size: 32064
|
|
342
|
-
qos: m2
|
|
343
284
|
time: 08:00:00
|
|
344
|
-
|
|
285
|
+
resource_type: l40s
|
|
345
286
|
vllm_args:
|
|
346
287
|
--max-model-len: 32768
|
|
347
288
|
--max-num-seqs: 256
|
|
348
|
-
--compilation-config: 3
|
|
349
289
|
llava-v1.6-34b-hf:
|
|
350
290
|
model_family: llava-v1.6
|
|
351
291
|
model_variant: 34b-hf
|
|
@@ -353,14 +293,12 @@ models:
|
|
|
353
293
|
gpus_per_node: 2
|
|
354
294
|
num_nodes: 1
|
|
355
295
|
vocab_size: 64064
|
|
356
|
-
qos: m2
|
|
357
296
|
time: 08:00:00
|
|
358
|
-
|
|
297
|
+
resource_type: l40s
|
|
359
298
|
vllm_args:
|
|
360
299
|
--tensor-parallel-size: 2
|
|
361
300
|
--max-model-len: 4096
|
|
362
301
|
--max-num-seqs: 256
|
|
363
|
-
--compilation-config: 3
|
|
364
302
|
Meta-Llama-3-8B:
|
|
365
303
|
model_family: Meta-Llama-3
|
|
366
304
|
model_variant: 8B
|
|
@@ -368,13 +306,11 @@ models:
|
|
|
368
306
|
gpus_per_node: 1
|
|
369
307
|
num_nodes: 1
|
|
370
308
|
vocab_size: 128256
|
|
371
|
-
qos: m2
|
|
372
309
|
time: 08:00:00
|
|
373
|
-
|
|
310
|
+
resource_type: l40s
|
|
374
311
|
vllm_args:
|
|
375
312
|
--max-model-len: 8192
|
|
376
313
|
--max-num-seqs: 256
|
|
377
|
-
--compilation-config: 3
|
|
378
314
|
Meta-Llama-3-8B-Instruct:
|
|
379
315
|
model_family: Meta-Llama-3
|
|
380
316
|
model_variant: 8B-Instruct
|
|
@@ -382,13 +318,11 @@ models:
|
|
|
382
318
|
gpus_per_node: 1
|
|
383
319
|
num_nodes: 1
|
|
384
320
|
vocab_size: 128256
|
|
385
|
-
qos: m2
|
|
386
321
|
time: 08:00:00
|
|
387
|
-
|
|
322
|
+
resource_type: l40s
|
|
388
323
|
vllm_args:
|
|
389
324
|
--max-model-len: 8192
|
|
390
325
|
--max-num-seqs: 256
|
|
391
|
-
--compilation-config: 3
|
|
392
326
|
Meta-Llama-3-70B:
|
|
393
327
|
model_family: Meta-Llama-3
|
|
394
328
|
model_variant: 70B
|
|
@@ -396,14 +330,12 @@ models:
|
|
|
396
330
|
gpus_per_node: 4
|
|
397
331
|
num_nodes: 1
|
|
398
332
|
vocab_size: 128256
|
|
399
|
-
qos: m2
|
|
400
333
|
time: 08:00:00
|
|
401
|
-
|
|
334
|
+
resource_type: l40s
|
|
402
335
|
vllm_args:
|
|
403
336
|
--tensor-parallel-size: 4
|
|
404
337
|
--max-model-len: 8192
|
|
405
338
|
--max-num-seqs: 256
|
|
406
|
-
--compilation-config: 3
|
|
407
339
|
Meta-Llama-3-70B-Instruct:
|
|
408
340
|
model_family: Meta-Llama-3
|
|
409
341
|
model_variant: 70B-Instruct
|
|
@@ -411,14 +343,12 @@ models:
|
|
|
411
343
|
gpus_per_node: 4
|
|
412
344
|
num_nodes: 1
|
|
413
345
|
vocab_size: 128256
|
|
414
|
-
qos: m2
|
|
415
346
|
time: 08:00:00
|
|
416
|
-
|
|
347
|
+
resource_type: l40s
|
|
417
348
|
vllm_args:
|
|
418
349
|
--tensor-parallel-size: 4
|
|
419
350
|
--max-model-len: 8192
|
|
420
351
|
--max-num-seqs: 256
|
|
421
|
-
--compilation-config: 3
|
|
422
352
|
Meta-Llama-3.1-8B:
|
|
423
353
|
model_family: Meta-Llama-3.1
|
|
424
354
|
model_variant: 8B
|
|
@@ -426,13 +356,11 @@ models:
|
|
|
426
356
|
gpus_per_node: 1
|
|
427
357
|
num_nodes: 1
|
|
428
358
|
vocab_size: 128256
|
|
429
|
-
qos: m2
|
|
430
359
|
time: 08:00:00
|
|
431
|
-
|
|
360
|
+
resource_type: l40s
|
|
432
361
|
vllm_args:
|
|
433
362
|
--max-model-len: 131072
|
|
434
363
|
--max-num-seqs: 256
|
|
435
|
-
--compilation-config: 3
|
|
436
364
|
Meta-Llama-3.1-8B-Instruct:
|
|
437
365
|
model_family: Meta-Llama-3.1
|
|
438
366
|
model_variant: 8B-Instruct
|
|
@@ -440,13 +368,11 @@ models:
|
|
|
440
368
|
gpus_per_node: 1
|
|
441
369
|
num_nodes: 1
|
|
442
370
|
vocab_size: 128256
|
|
443
|
-
qos: m2
|
|
444
371
|
time: 08:00:00
|
|
445
|
-
|
|
372
|
+
resource_type: l40s
|
|
446
373
|
vllm_args:
|
|
447
374
|
--max-model-len: 131072
|
|
448
375
|
--max-num-seqs: 256
|
|
449
|
-
--compilation-config: 3
|
|
450
376
|
Meta-Llama-3.1-70B:
|
|
451
377
|
model_family: Meta-Llama-3.1
|
|
452
378
|
model_variant: 70B
|
|
@@ -454,14 +380,12 @@ models:
|
|
|
454
380
|
gpus_per_node: 4
|
|
455
381
|
num_nodes: 1
|
|
456
382
|
vocab_size: 128256
|
|
457
|
-
qos: m2
|
|
458
383
|
time: 08:00:00
|
|
459
|
-
|
|
384
|
+
resource_type: l40s
|
|
460
385
|
vllm_args:
|
|
461
386
|
--tensor-parallel-size: 4
|
|
462
387
|
--max-model-len: 65536
|
|
463
388
|
--max-num-seqs: 256
|
|
464
|
-
--compilation-config: 3
|
|
465
389
|
Meta-Llama-3.1-70B-Instruct:
|
|
466
390
|
model_family: Meta-Llama-3.1
|
|
467
391
|
model_variant: 70B-Instruct
|
|
@@ -469,14 +393,12 @@ models:
|
|
|
469
393
|
gpus_per_node: 4
|
|
470
394
|
num_nodes: 1
|
|
471
395
|
vocab_size: 128256
|
|
472
|
-
qos: m2
|
|
473
396
|
time: 08:00:00
|
|
474
|
-
|
|
397
|
+
resource_type: l40s
|
|
475
398
|
vllm_args:
|
|
476
399
|
--tensor-parallel-size: 4
|
|
477
400
|
--max-model-len: 65536
|
|
478
401
|
--max-num-seqs: 256
|
|
479
|
-
--compilation-config: 3
|
|
480
402
|
Meta-Llama-3.1-405B-Instruct:
|
|
481
403
|
model_family: Meta-Llama-3.1
|
|
482
404
|
model_variant: 405B-Instruct
|
|
@@ -486,7 +408,7 @@ models:
|
|
|
486
408
|
vocab_size: 128256
|
|
487
409
|
qos: m4
|
|
488
410
|
time: 02:00:00
|
|
489
|
-
|
|
411
|
+
resource_type: l40s
|
|
490
412
|
vllm_args:
|
|
491
413
|
--pipeline-parallel-size: 8
|
|
492
414
|
--tensor-parallel-size: 4
|
|
@@ -499,13 +421,11 @@ models:
|
|
|
499
421
|
gpus_per_node: 1
|
|
500
422
|
num_nodes: 1
|
|
501
423
|
vocab_size: 32000
|
|
502
|
-
qos: m2
|
|
503
424
|
time: 08:00:00
|
|
504
|
-
|
|
425
|
+
resource_type: l40s
|
|
505
426
|
vllm_args:
|
|
506
427
|
--max-model-len: 32768
|
|
507
428
|
--max-num-seqs: 256
|
|
508
|
-
--compilation-config: 3
|
|
509
429
|
Mistral-7B-Instruct-v0.2:
|
|
510
430
|
model_family: Mistral
|
|
511
431
|
model_variant: 7B-Instruct-v0.2
|
|
@@ -513,13 +433,11 @@ models:
|
|
|
513
433
|
gpus_per_node: 1
|
|
514
434
|
num_nodes: 1
|
|
515
435
|
vocab_size: 32000
|
|
516
|
-
qos: m2
|
|
517
436
|
time: 08:00:00
|
|
518
|
-
|
|
437
|
+
resource_type: l40s
|
|
519
438
|
vllm_args:
|
|
520
439
|
--max-model-len: 32768
|
|
521
440
|
--max-num-seqs: 256
|
|
522
|
-
--compilation-config: 3
|
|
523
441
|
Mistral-7B-v0.3:
|
|
524
442
|
model_family: Mistral
|
|
525
443
|
model_variant: 7B-v0.3
|
|
@@ -527,13 +445,11 @@ models:
|
|
|
527
445
|
gpus_per_node: 1
|
|
528
446
|
num_nodes: 1
|
|
529
447
|
vocab_size: 32768
|
|
530
|
-
qos: m2
|
|
531
448
|
time: 08:00:00
|
|
532
|
-
|
|
449
|
+
resource_type: l40s
|
|
533
450
|
vllm_args:
|
|
534
451
|
--max-model-len: 32768
|
|
535
452
|
--max-num-seqs: 256
|
|
536
|
-
--compilation-config: 3
|
|
537
453
|
Mistral-7B-Instruct-v0.3:
|
|
538
454
|
model_family: Mistral
|
|
539
455
|
model_variant: 7B-Instruct-v0.3
|
|
@@ -541,13 +457,11 @@ models:
|
|
|
541
457
|
gpus_per_node: 1
|
|
542
458
|
num_nodes: 1
|
|
543
459
|
vocab_size: 32768
|
|
544
|
-
qos: m2
|
|
545
460
|
time: 08:00:00
|
|
546
|
-
|
|
461
|
+
resource_type: l40s
|
|
547
462
|
vllm_args:
|
|
548
463
|
--max-model-len: 32768
|
|
549
464
|
--max-num-seqs: 256
|
|
550
|
-
--compilation-config: 3
|
|
551
465
|
Mistral-Large-Instruct-2407:
|
|
552
466
|
model_family: Mistral
|
|
553
467
|
model_variant: Large-Instruct-2407
|
|
@@ -555,9 +469,8 @@ models:
|
|
|
555
469
|
gpus_per_node: 4
|
|
556
470
|
num_nodes: 2
|
|
557
471
|
vocab_size: 32768
|
|
558
|
-
qos: m2
|
|
559
472
|
time: 08:00:00
|
|
560
|
-
|
|
473
|
+
resource_type: l40s
|
|
561
474
|
vllm_args:
|
|
562
475
|
--pipeline-parallel-size: 2
|
|
563
476
|
--tensor-parallel-size: 4
|
|
@@ -570,9 +483,8 @@ models:
|
|
|
570
483
|
gpus_per_node: 4
|
|
571
484
|
num_nodes: 2
|
|
572
485
|
vocab_size: 32768
|
|
573
|
-
qos: m2
|
|
574
486
|
time: 08:00:00
|
|
575
|
-
|
|
487
|
+
resource_type: l40s
|
|
576
488
|
vllm_args:
|
|
577
489
|
--pipeline-parallel-size: 2
|
|
578
490
|
--tensor-parallel-size: 4
|
|
@@ -585,14 +497,12 @@ models:
|
|
|
585
497
|
gpus_per_node: 4
|
|
586
498
|
num_nodes: 1
|
|
587
499
|
vocab_size: 32000
|
|
588
|
-
qos: m2
|
|
589
500
|
time: 08:00:00
|
|
590
|
-
|
|
501
|
+
resource_type: l40s
|
|
591
502
|
vllm_args:
|
|
592
503
|
--tensor-parallel-size: 4
|
|
593
504
|
--max-model-len: 32768
|
|
594
505
|
--max-num-seqs: 256
|
|
595
|
-
--compilation-config: 3
|
|
596
506
|
Mixtral-8x22B-v0.1:
|
|
597
507
|
model_family: Mixtral
|
|
598
508
|
model_variant: 8x22B-v0.1
|
|
@@ -600,9 +510,8 @@ models:
|
|
|
600
510
|
gpus_per_node: 4
|
|
601
511
|
num_nodes: 2
|
|
602
512
|
vocab_size: 32768
|
|
603
|
-
qos: m2
|
|
604
513
|
time: 08:00:00
|
|
605
|
-
|
|
514
|
+
resource_type: l40s
|
|
606
515
|
vllm_args:
|
|
607
516
|
--pipeline-parallel-size: 2
|
|
608
517
|
--tensor-parallel-size: 4
|
|
@@ -615,9 +524,8 @@ models:
|
|
|
615
524
|
gpus_per_node: 4
|
|
616
525
|
num_nodes: 2
|
|
617
526
|
vocab_size: 32768
|
|
618
|
-
qos: m2
|
|
619
527
|
time: 08:00:00
|
|
620
|
-
|
|
528
|
+
resource_type: l40s
|
|
621
529
|
vllm_args:
|
|
622
530
|
--pipeline-parallel-size: 2
|
|
623
531
|
--tensor-parallel-size: 4
|
|
@@ -630,14 +538,12 @@ models:
|
|
|
630
538
|
gpus_per_node: 2
|
|
631
539
|
num_nodes: 1
|
|
632
540
|
vocab_size: 32064
|
|
633
|
-
qos: m2
|
|
634
541
|
time: 08:00:00
|
|
635
|
-
|
|
542
|
+
resource_type: l40s
|
|
636
543
|
vllm_args:
|
|
637
544
|
--tensor-parallel-size: 2
|
|
638
545
|
--max-model-len: 131072
|
|
639
546
|
--max-num-seqs: 256
|
|
640
|
-
--compilation-config: 3
|
|
641
547
|
Phi-3-vision-128k-instruct:
|
|
642
548
|
model_family: Phi-3-vision
|
|
643
549
|
model_variant: 128k-instruct
|
|
@@ -645,14 +551,12 @@ models:
|
|
|
645
551
|
gpus_per_node: 2
|
|
646
552
|
num_nodes: 1
|
|
647
553
|
vocab_size: 32064
|
|
648
|
-
qos: m2
|
|
649
554
|
time: 08:00:00
|
|
650
|
-
|
|
555
|
+
resource_type: l40s
|
|
651
556
|
vllm_args:
|
|
652
557
|
--tensor-parallel-size: 2
|
|
653
558
|
--max-model-len: 65536
|
|
654
559
|
--max-num-seqs: 256
|
|
655
|
-
--compilation-config: 3
|
|
656
560
|
Llama3-OpenBioLLM-70B:
|
|
657
561
|
model_family: Llama3-OpenBioLLM
|
|
658
562
|
model_variant: 70B
|
|
@@ -660,14 +564,12 @@ models:
|
|
|
660
564
|
gpus_per_node: 4
|
|
661
565
|
num_nodes: 1
|
|
662
566
|
vocab_size: 128256
|
|
663
|
-
qos: m2
|
|
664
567
|
time: 08:00:00
|
|
665
|
-
|
|
568
|
+
resource_type: l40s
|
|
666
569
|
vllm_args:
|
|
667
570
|
--tensor-parallel-size: 4
|
|
668
571
|
--max-model-len: 8192
|
|
669
572
|
--max-num-seqs: 256
|
|
670
|
-
--compilation-config: 3
|
|
671
573
|
Llama-3.1-Nemotron-70B-Instruct-HF:
|
|
672
574
|
model_family: Llama-3.1-Nemotron
|
|
673
575
|
model_variant: 70B-Instruct-HF
|
|
@@ -675,14 +577,12 @@ models:
|
|
|
675
577
|
gpus_per_node: 4
|
|
676
578
|
num_nodes: 1
|
|
677
579
|
vocab_size: 128256
|
|
678
|
-
qos: m2
|
|
679
580
|
time: 08:00:00
|
|
680
|
-
|
|
581
|
+
resource_type: l40s
|
|
681
582
|
vllm_args:
|
|
682
583
|
--tensor-parallel-size: 4
|
|
683
584
|
--max-model-len: 65536
|
|
684
585
|
--max-num-seqs: 256
|
|
685
|
-
--compilation-config: 3
|
|
686
586
|
Llama-3.2-1B:
|
|
687
587
|
model_family: Llama-3.2
|
|
688
588
|
model_variant: 1B
|
|
@@ -690,13 +590,11 @@ models:
|
|
|
690
590
|
gpus_per_node: 1
|
|
691
591
|
num_nodes: 1
|
|
692
592
|
vocab_size: 128256
|
|
693
|
-
qos: m2
|
|
694
593
|
time: 08:00:00
|
|
695
|
-
|
|
594
|
+
resource_type: l40s
|
|
696
595
|
vllm_args:
|
|
697
596
|
--max-model-len: 131072
|
|
698
597
|
--max-num-seqs: 256
|
|
699
|
-
--compilation-config: 3
|
|
700
598
|
Llama-3.2-1B-Instruct:
|
|
701
599
|
model_family: Llama-3.2
|
|
702
600
|
model_variant: 1B-Instruct
|
|
@@ -704,13 +602,11 @@ models:
|
|
|
704
602
|
gpus_per_node: 1
|
|
705
603
|
num_nodes: 1
|
|
706
604
|
vocab_size: 128256
|
|
707
|
-
qos: m2
|
|
708
605
|
time: 08:00:00
|
|
709
|
-
|
|
606
|
+
resource_type: l40s
|
|
710
607
|
vllm_args:
|
|
711
608
|
--max-model-len: 131072
|
|
712
609
|
--max-num-seqs: 256
|
|
713
|
-
--compilation-config: 3
|
|
714
610
|
Llama-3.2-3B:
|
|
715
611
|
model_family: Llama-3.2
|
|
716
612
|
model_variant: 3B
|
|
@@ -718,13 +614,11 @@ models:
|
|
|
718
614
|
gpus_per_node: 1
|
|
719
615
|
num_nodes: 1
|
|
720
616
|
vocab_size: 128256
|
|
721
|
-
qos: m2
|
|
722
617
|
time: 08:00:00
|
|
723
|
-
|
|
618
|
+
resource_type: l40s
|
|
724
619
|
vllm_args:
|
|
725
620
|
--max-model-len: 131072
|
|
726
621
|
--max-num-seqs: 256
|
|
727
|
-
--compilation-config: 3
|
|
728
622
|
Llama-3.2-3B-Instruct:
|
|
729
623
|
model_family: Llama-3.2
|
|
730
624
|
model_variant: 3B-Instruct
|
|
@@ -732,13 +626,11 @@ models:
|
|
|
732
626
|
gpus_per_node: 1
|
|
733
627
|
num_nodes: 1
|
|
734
628
|
vocab_size: 128256
|
|
735
|
-
qos: m2
|
|
736
629
|
time: 08:00:00
|
|
737
|
-
|
|
630
|
+
resource_type: l40s
|
|
738
631
|
vllm_args:
|
|
739
632
|
--max-model-len: 131072
|
|
740
633
|
--max-num-seqs: 256
|
|
741
|
-
--compilation-config: 3
|
|
742
634
|
Llama-3.2-11B-Vision:
|
|
743
635
|
model_family: Llama-3.2
|
|
744
636
|
model_variant: 11B-Vision
|
|
@@ -746,14 +638,12 @@ models:
|
|
|
746
638
|
gpus_per_node: 2
|
|
747
639
|
num_nodes: 1
|
|
748
640
|
vocab_size: 128256
|
|
749
|
-
qos: m2
|
|
750
641
|
time: 08:00:00
|
|
751
|
-
|
|
642
|
+
resource_type: l40s
|
|
752
643
|
vllm_args:
|
|
753
644
|
--tensor-parallel-size: 2
|
|
754
645
|
--max-model-len: 4096
|
|
755
646
|
--max-num-seqs: 64
|
|
756
|
-
--compilation-config: 3
|
|
757
647
|
--enforce-eager: true
|
|
758
648
|
Llama-3.2-11B-Vision-Instruct:
|
|
759
649
|
model_family: Llama-3.2
|
|
@@ -762,14 +652,12 @@ models:
|
|
|
762
652
|
gpus_per_node: 2
|
|
763
653
|
num_nodes: 1
|
|
764
654
|
vocab_size: 128256
|
|
765
|
-
qos: m2
|
|
766
655
|
time: 08:00:00
|
|
767
|
-
|
|
656
|
+
resource_type: l40s
|
|
768
657
|
vllm_args:
|
|
769
658
|
--tensor-parallel-size: 2
|
|
770
659
|
--max-model-len: 4096
|
|
771
660
|
--max-num-seqs: 64
|
|
772
|
-
--compilation-config: 3
|
|
773
661
|
--enforce-eager: true
|
|
774
662
|
Llama-3.2-90B-Vision:
|
|
775
663
|
model_family: Llama-3.2
|
|
@@ -778,14 +666,12 @@ models:
|
|
|
778
666
|
gpus_per_node: 4
|
|
779
667
|
num_nodes: 2
|
|
780
668
|
vocab_size: 128256
|
|
781
|
-
qos: m2
|
|
782
669
|
time: 08:00:00
|
|
783
|
-
|
|
670
|
+
resource_type: l40s
|
|
784
671
|
vllm_args:
|
|
785
672
|
--tensor-parallel-size: 8
|
|
786
673
|
--max-model-len: 4096
|
|
787
674
|
--max-num-seqs: 32
|
|
788
|
-
--compilation-config: 3
|
|
789
675
|
--enforce-eager: true
|
|
790
676
|
Llama-3.2-90B-Vision-Instruct:
|
|
791
677
|
model_family: Llama-3.2
|
|
@@ -794,14 +680,12 @@ models:
|
|
|
794
680
|
gpus_per_node: 4
|
|
795
681
|
num_nodes: 2
|
|
796
682
|
vocab_size: 128256
|
|
797
|
-
qos: m2
|
|
798
683
|
time: 08:00:00
|
|
799
|
-
|
|
684
|
+
resource_type: l40s
|
|
800
685
|
vllm_args:
|
|
801
686
|
--tensor-parallel-size: 8
|
|
802
687
|
--max-model-len: 4096
|
|
803
688
|
--max-num-seqs: 32
|
|
804
|
-
--compilation-config: 3
|
|
805
689
|
--enforce-eager: true
|
|
806
690
|
Qwen2.5-0.5B-Instruct:
|
|
807
691
|
model_family: Qwen2.5
|
|
@@ -810,13 +694,11 @@ models:
|
|
|
810
694
|
gpus_per_node: 1
|
|
811
695
|
num_nodes: 1
|
|
812
696
|
vocab_size: 152064
|
|
813
|
-
qos: m2
|
|
814
697
|
time: 08:00:00
|
|
815
|
-
|
|
698
|
+
resource_type: l40s
|
|
816
699
|
vllm_args:
|
|
817
700
|
--max-model-len: 32768
|
|
818
701
|
--max-num-seqs: 256
|
|
819
|
-
--compilation-config: 3
|
|
820
702
|
Qwen2.5-1.5B-Instruct:
|
|
821
703
|
model_family: Qwen2.5
|
|
822
704
|
model_variant: 1.5B-Instruct
|
|
@@ -824,13 +706,11 @@ models:
|
|
|
824
706
|
gpus_per_node: 1
|
|
825
707
|
num_nodes: 1
|
|
826
708
|
vocab_size: 152064
|
|
827
|
-
qos: m2
|
|
828
709
|
time: 08:00:00
|
|
829
|
-
|
|
710
|
+
resource_type: l40s
|
|
830
711
|
vllm_args:
|
|
831
712
|
--max-model-len: 32768
|
|
832
713
|
--max-num-seqs: 256
|
|
833
|
-
--compilation-config: 3
|
|
834
714
|
Qwen2.5-3B-Instruct:
|
|
835
715
|
model_family: Qwen2.5
|
|
836
716
|
model_variant: 3B-Instruct
|
|
@@ -838,13 +718,11 @@ models:
|
|
|
838
718
|
gpus_per_node: 1
|
|
839
719
|
num_nodes: 1
|
|
840
720
|
vocab_size: 152064
|
|
841
|
-
qos: m2
|
|
842
721
|
time: 08:00:00
|
|
843
|
-
|
|
722
|
+
resource_type: l40s
|
|
844
723
|
vllm_args:
|
|
845
724
|
--max-model-len: 32768
|
|
846
725
|
--max-num-seqs: 256
|
|
847
|
-
--compilation-config: 3
|
|
848
726
|
Qwen2.5-7B-Instruct:
|
|
849
727
|
model_family: Qwen2.5
|
|
850
728
|
model_variant: 7B-Instruct
|
|
@@ -852,13 +730,11 @@ models:
|
|
|
852
730
|
gpus_per_node: 1
|
|
853
731
|
num_nodes: 1
|
|
854
732
|
vocab_size: 152064
|
|
855
|
-
qos: m2
|
|
856
733
|
time: 08:00:00
|
|
857
|
-
|
|
734
|
+
resource_type: l40s
|
|
858
735
|
vllm_args:
|
|
859
736
|
--max-model-len: 32768
|
|
860
737
|
--max-num-seqs: 256
|
|
861
|
-
--compilation-config: 3
|
|
862
738
|
Qwen2.5-14B-Instruct:
|
|
863
739
|
model_family: Qwen2.5
|
|
864
740
|
model_variant: 14B-Instruct
|
|
@@ -866,13 +742,11 @@ models:
|
|
|
866
742
|
gpus_per_node: 1
|
|
867
743
|
num_nodes: 1
|
|
868
744
|
vocab_size: 152064
|
|
869
|
-
qos: m2
|
|
870
745
|
time: 08:00:00
|
|
871
|
-
|
|
746
|
+
resource_type: l40s
|
|
872
747
|
vllm_args:
|
|
873
748
|
--max-model-len: 32768
|
|
874
749
|
--max-num-seqs: 256
|
|
875
|
-
--compilation-config: 3
|
|
876
750
|
Qwen2.5-32B-Instruct:
|
|
877
751
|
model_family: Qwen2.5
|
|
878
752
|
model_variant: 32B-Instruct
|
|
@@ -880,14 +754,12 @@ models:
|
|
|
880
754
|
gpus_per_node: 2
|
|
881
755
|
num_nodes: 1
|
|
882
756
|
vocab_size: 152064
|
|
883
|
-
qos: m2
|
|
884
757
|
time: 08:00:00
|
|
885
|
-
|
|
758
|
+
resource_type: l40s
|
|
886
759
|
vllm_args:
|
|
887
760
|
--tensor-parallel-size: 2
|
|
888
761
|
--max-model-len: 32768
|
|
889
762
|
--max-num-seqs: 256
|
|
890
|
-
--compilation-config: 3
|
|
891
763
|
Qwen2.5-72B-Instruct:
|
|
892
764
|
model_family: Qwen2.5
|
|
893
765
|
model_variant: 72B-Instruct
|
|
@@ -895,14 +767,12 @@ models:
|
|
|
895
767
|
gpus_per_node: 4
|
|
896
768
|
num_nodes: 1
|
|
897
769
|
vocab_size: 152064
|
|
898
|
-
qos: m2
|
|
899
770
|
time: 08:00:00
|
|
900
|
-
|
|
771
|
+
resource_type: l40s
|
|
901
772
|
vllm_args:
|
|
902
773
|
--tensor-parallel-size: 4
|
|
903
774
|
--max-model-len: 16384
|
|
904
775
|
--max-num-seqs: 256
|
|
905
|
-
--compilation-config: 3
|
|
906
776
|
Qwen2.5-Math-1.5B-Instruct:
|
|
907
777
|
model_family: Qwen2.5
|
|
908
778
|
model_variant: Math-1.5B-Instruct
|
|
@@ -910,13 +780,11 @@ models:
|
|
|
910
780
|
gpus_per_node: 1
|
|
911
781
|
num_nodes: 1
|
|
912
782
|
vocab_size: 152064
|
|
913
|
-
qos: m2
|
|
914
783
|
time: 08:00:00
|
|
915
|
-
|
|
784
|
+
resource_type: l40s
|
|
916
785
|
vllm_args:
|
|
917
786
|
--max-model-len: 4096
|
|
918
787
|
--max-num-seqs: 256
|
|
919
|
-
--compilation-config: 3
|
|
920
788
|
Qwen2.5-Math-7B-Instruct:
|
|
921
789
|
model_family: Qwen2.5
|
|
922
790
|
model_variant: Math-7B-Instruct
|
|
@@ -924,13 +792,11 @@ models:
|
|
|
924
792
|
gpus_per_node: 1
|
|
925
793
|
num_nodes: 1
|
|
926
794
|
vocab_size: 152064
|
|
927
|
-
qos: m2
|
|
928
795
|
time: 08:00:00
|
|
929
|
-
|
|
796
|
+
resource_type: l40s
|
|
930
797
|
vllm_args:
|
|
931
798
|
--max-model-len: 4096
|
|
932
799
|
--max-num-seqs: 256
|
|
933
|
-
--compilation-config: 3
|
|
934
800
|
Qwen2.5-Math-72B-Instruct:
|
|
935
801
|
model_family: Qwen2.5
|
|
936
802
|
model_variant: Math-72B-Instruct
|
|
@@ -938,14 +804,12 @@ models:
|
|
|
938
804
|
gpus_per_node: 4
|
|
939
805
|
num_nodes: 1
|
|
940
806
|
vocab_size: 152064
|
|
941
|
-
qos: m2
|
|
942
807
|
time: 08:00:00
|
|
943
|
-
|
|
808
|
+
resource_type: l40s
|
|
944
809
|
vllm_args:
|
|
945
810
|
--tensor-parallel-size: 4
|
|
946
811
|
--max-model-len: 4096
|
|
947
812
|
--max-num-seqs: 256
|
|
948
|
-
--compilation-config: 3
|
|
949
813
|
Qwen2.5-Coder-7B-Instruct:
|
|
950
814
|
model_family: Qwen2.5
|
|
951
815
|
model_variant: Coder-7B-Instruct
|
|
@@ -953,13 +817,11 @@ models:
|
|
|
953
817
|
gpus_per_node: 1
|
|
954
818
|
num_nodes: 1
|
|
955
819
|
vocab_size: 152064
|
|
956
|
-
qos: m2
|
|
957
820
|
time: 08:00:00
|
|
958
|
-
|
|
821
|
+
resource_type: l40s
|
|
959
822
|
vllm_args:
|
|
960
823
|
--max-model-len: 32768
|
|
961
824
|
--max-num-seqs: 256
|
|
962
|
-
--compilation-config: 3
|
|
963
825
|
Qwen2.5-Math-RM-72B:
|
|
964
826
|
model_family: Qwen2.5
|
|
965
827
|
model_variant: Math-RM-72B
|
|
@@ -967,14 +829,12 @@ models:
|
|
|
967
829
|
gpus_per_node: 4
|
|
968
830
|
num_nodes: 1
|
|
969
831
|
vocab_size: 152064
|
|
970
|
-
qos: m2
|
|
971
832
|
time: 08:00:00
|
|
972
|
-
|
|
833
|
+
resource_type: l40s
|
|
973
834
|
vllm_args:
|
|
974
835
|
--tensor-parallel-size: 4
|
|
975
836
|
--max-model-len: 4096
|
|
976
837
|
--max-num-seqs: 256
|
|
977
|
-
--compilation-config: 3
|
|
978
838
|
Qwen2.5-Math-PRM-7B:
|
|
979
839
|
model_family: Qwen2.5
|
|
980
840
|
model_variant: Math-PRM-7B
|
|
@@ -982,28 +842,24 @@ models:
|
|
|
982
842
|
gpus_per_node: 1
|
|
983
843
|
num_nodes: 1
|
|
984
844
|
vocab_size: 152064
|
|
985
|
-
qos: m2
|
|
986
845
|
time: 08:00:00
|
|
987
|
-
|
|
846
|
+
resource_type: l40s
|
|
988
847
|
vllm_args:
|
|
989
848
|
--max-model-len: 4096
|
|
990
849
|
--max-num-seqs: 256
|
|
991
|
-
|
|
992
|
-
QwQ-32B-Preview:
|
|
850
|
+
QwQ-32B:
|
|
993
851
|
model_family: QwQ
|
|
994
|
-
model_variant: 32B
|
|
852
|
+
model_variant: 32B
|
|
995
853
|
model_type: LLM
|
|
996
854
|
gpus_per_node: 2
|
|
997
855
|
num_nodes: 1
|
|
998
856
|
vocab_size: 152064
|
|
999
|
-
qos: m2
|
|
1000
857
|
time: 08:00:00
|
|
1001
|
-
|
|
858
|
+
resource_type: l40s
|
|
1002
859
|
vllm_args:
|
|
1003
860
|
--tensor-parallel-size: 2
|
|
1004
861
|
--max-model-len: 32768
|
|
1005
862
|
--max-num-seqs: 256
|
|
1006
|
-
--compilation-config: 3
|
|
1007
863
|
Pixtral-12B-2409:
|
|
1008
864
|
model_family: Pixtral
|
|
1009
865
|
model_variant: 12B-2409
|
|
@@ -1011,13 +867,11 @@ models:
|
|
|
1011
867
|
gpus_per_node: 1
|
|
1012
868
|
num_nodes: 1
|
|
1013
869
|
vocab_size: 131072
|
|
1014
|
-
qos: m2
|
|
1015
870
|
time: 08:00:00
|
|
1016
|
-
|
|
871
|
+
resource_type: l40s
|
|
1017
872
|
vllm_args:
|
|
1018
873
|
--max-model-len: 8192
|
|
1019
874
|
--max-num-seqs: 256
|
|
1020
|
-
--compilation-config: 3
|
|
1021
875
|
e5-mistral-7b-instruct:
|
|
1022
876
|
model_family: e5
|
|
1023
877
|
model_variant: mistral-7b-instruct
|
|
@@ -1025,13 +879,11 @@ models:
|
|
|
1025
879
|
gpus_per_node: 1
|
|
1026
880
|
num_nodes: 1
|
|
1027
881
|
vocab_size: 32000
|
|
1028
|
-
qos: m2
|
|
1029
882
|
time: 08:00:00
|
|
1030
|
-
|
|
883
|
+
resource_type: l40s
|
|
1031
884
|
vllm_args:
|
|
1032
885
|
--max-model-len: 4096
|
|
1033
886
|
--max-num-seqs: 256
|
|
1034
|
-
--compilation-config: 3
|
|
1035
887
|
bge-base-en-v1.5:
|
|
1036
888
|
model_family: bge
|
|
1037
889
|
model_variant: base-en-v1.5
|
|
@@ -1039,13 +891,11 @@ models:
|
|
|
1039
891
|
gpus_per_node: 1
|
|
1040
892
|
num_nodes: 1
|
|
1041
893
|
vocab_size: 30522
|
|
1042
|
-
qos: m2
|
|
1043
894
|
time: 08:00:00
|
|
1044
|
-
|
|
895
|
+
resource_type: l40s
|
|
1045
896
|
vllm_args:
|
|
1046
897
|
--max-model-len: 512
|
|
1047
898
|
--max-num-seqs: 256
|
|
1048
|
-
--compilation-config: 3
|
|
1049
899
|
all-MiniLM-L6-v2:
|
|
1050
900
|
model_family: all-MiniLM
|
|
1051
901
|
model_variant: L6-v2
|
|
@@ -1053,13 +903,11 @@ models:
|
|
|
1053
903
|
gpus_per_node: 1
|
|
1054
904
|
num_nodes: 1
|
|
1055
905
|
vocab_size: 30522
|
|
1056
|
-
qos: m2
|
|
1057
906
|
time: 08:00:00
|
|
1058
|
-
|
|
907
|
+
resource_type: l40s
|
|
1059
908
|
vllm_args:
|
|
1060
909
|
--max-model-len: 512
|
|
1061
910
|
--max-num-seqs: 256
|
|
1062
|
-
--compilation-config: 3
|
|
1063
911
|
Llama-3.3-70B-Instruct:
|
|
1064
912
|
model_family: Llama-3.3
|
|
1065
913
|
model_variant: 70B-Instruct
|
|
@@ -1067,14 +915,12 @@ models:
|
|
|
1067
915
|
gpus_per_node: 4
|
|
1068
916
|
num_nodes: 1
|
|
1069
917
|
vocab_size: 128256
|
|
1070
|
-
qos: m2
|
|
1071
918
|
time: 08:00:00
|
|
1072
|
-
|
|
919
|
+
resource_type: l40s
|
|
1073
920
|
vllm_args:
|
|
1074
921
|
--tensor-parallel-size: 4
|
|
1075
922
|
--max-model-len: 65536
|
|
1076
923
|
--max-num-seqs: 256
|
|
1077
|
-
--compilation-config: 3
|
|
1078
924
|
InternVL2_5-26B:
|
|
1079
925
|
model_family: InternVL2_5
|
|
1080
926
|
model_variant: 26B
|
|
@@ -1082,14 +928,12 @@ models:
|
|
|
1082
928
|
gpus_per_node: 2
|
|
1083
929
|
num_nodes: 1
|
|
1084
930
|
vocab_size: 92553
|
|
1085
|
-
qos: m2
|
|
1086
931
|
time: 08:00:00
|
|
1087
|
-
|
|
932
|
+
resource_type: l40s
|
|
1088
933
|
vllm_args:
|
|
1089
934
|
--tensor-parallel-size: 2
|
|
1090
935
|
--max-model-len: 32768
|
|
1091
936
|
--max-num-seqs: 256
|
|
1092
|
-
--compilation-config: 3
|
|
1093
937
|
InternVL2_5-38B:
|
|
1094
938
|
model_family: InternVL2_5
|
|
1095
939
|
model_variant: 38B
|
|
@@ -1097,14 +941,12 @@ models:
|
|
|
1097
941
|
gpus_per_node: 4
|
|
1098
942
|
num_nodes: 1
|
|
1099
943
|
vocab_size: 92553
|
|
1100
|
-
qos: m2
|
|
1101
944
|
time: 08:00:00
|
|
1102
|
-
|
|
945
|
+
resource_type: l40s
|
|
1103
946
|
vllm_args:
|
|
1104
947
|
--tensor-parallel-size: 4
|
|
1105
948
|
--max-model-len: 32768
|
|
1106
949
|
--max-num-seqs: 256
|
|
1107
|
-
--compilation-config: 3
|
|
1108
950
|
Aya-Expanse-32B:
|
|
1109
951
|
model_family: Aya-Expanse
|
|
1110
952
|
model_variant: 32B
|
|
@@ -1112,14 +954,12 @@ models:
|
|
|
1112
954
|
gpus_per_node: 2
|
|
1113
955
|
num_nodes: 1
|
|
1114
956
|
vocab_size: 256000
|
|
1115
|
-
qos: m2
|
|
1116
957
|
time: 08:00:00
|
|
1117
|
-
|
|
958
|
+
resource_type: l40s
|
|
1118
959
|
vllm_args:
|
|
1119
960
|
--tensor-parallel-size: 2
|
|
1120
961
|
--max-model-len: 8192
|
|
1121
962
|
--max-num-seqs: 256
|
|
1122
|
-
--compilation-config: 3
|
|
1123
963
|
DeepSeek-R1-Distill-Llama-70B:
|
|
1124
964
|
model_family: DeepSeek-R1
|
|
1125
965
|
model_variant: Distill-Llama-70B
|
|
@@ -1127,14 +967,12 @@ models:
|
|
|
1127
967
|
gpus_per_node: 4
|
|
1128
968
|
num_nodes: 1
|
|
1129
969
|
vocab_size: 128256
|
|
1130
|
-
qos: m2
|
|
1131
970
|
time: 08:00:00
|
|
1132
|
-
|
|
971
|
+
resource_type: l40s
|
|
1133
972
|
vllm_args:
|
|
1134
973
|
--tensor-parallel-size: 4
|
|
1135
974
|
--max-model-len: 65536
|
|
1136
975
|
--max-num-seqs: 256
|
|
1137
|
-
--compilation-config: 3
|
|
1138
976
|
DeepSeek-R1-Distill-Llama-8B:
|
|
1139
977
|
model_family: DeepSeek-R1
|
|
1140
978
|
model_variant: Distill-Llama-8B
|
|
@@ -1142,13 +980,11 @@ models:
|
|
|
1142
980
|
gpus_per_node: 1
|
|
1143
981
|
num_nodes: 1
|
|
1144
982
|
vocab_size: 128256
|
|
1145
|
-
qos: m2
|
|
1146
983
|
time: 08:00:00
|
|
1147
|
-
|
|
984
|
+
resource_type: l40s
|
|
1148
985
|
vllm_args:
|
|
1149
986
|
--max-model-len: 131072
|
|
1150
987
|
--max-num-seqs: 256
|
|
1151
|
-
--compilation-config: 3
|
|
1152
988
|
DeepSeek-R1-Distill-Qwen-32B:
|
|
1153
989
|
model_family: DeepSeek-R1
|
|
1154
990
|
model_variant: Distill-Qwen-32B
|
|
@@ -1156,14 +992,12 @@ models:
|
|
|
1156
992
|
gpus_per_node: 2
|
|
1157
993
|
num_nodes: 1
|
|
1158
994
|
vocab_size: 152064
|
|
1159
|
-
qos: m2
|
|
1160
995
|
time: 08:00:00
|
|
1161
|
-
|
|
996
|
+
resource_type: l40s
|
|
1162
997
|
vllm_args:
|
|
1163
998
|
--tensor-parallel-size: 2
|
|
1164
999
|
--max-model-len: 65536
|
|
1165
1000
|
--max-num-seqs: 256
|
|
1166
|
-
--compilation-config: 3
|
|
1167
1001
|
DeepSeek-R1-Distill-Qwen-14B:
|
|
1168
1002
|
model_family: DeepSeek-R1
|
|
1169
1003
|
model_variant: Distill-Qwen-14B
|
|
@@ -1171,13 +1005,11 @@ models:
|
|
|
1171
1005
|
gpus_per_node: 1
|
|
1172
1006
|
num_nodes: 1
|
|
1173
1007
|
vocab_size: 152064
|
|
1174
|
-
qos: m2
|
|
1175
1008
|
time: 08:00:00
|
|
1176
|
-
|
|
1009
|
+
resource_type: l40s
|
|
1177
1010
|
vllm_args:
|
|
1178
1011
|
--max-model-len: 65536
|
|
1179
1012
|
--max-num-seqs: 256
|
|
1180
|
-
--compilation-config: 3
|
|
1181
1013
|
DeepSeek-R1-Distill-Qwen-7B:
|
|
1182
1014
|
model_family: DeepSeek-R1
|
|
1183
1015
|
model_variant: Distill-Qwen-7B
|
|
@@ -1185,13 +1017,11 @@ models:
|
|
|
1185
1017
|
gpus_per_node: 1
|
|
1186
1018
|
num_nodes: 1
|
|
1187
1019
|
vocab_size: 152064
|
|
1188
|
-
qos: m2
|
|
1189
1020
|
time: 08:00:00
|
|
1190
|
-
|
|
1021
|
+
resource_type: l40s
|
|
1191
1022
|
vllm_args:
|
|
1192
1023
|
--max-model-len: 131072
|
|
1193
1024
|
--max-num-seqs: 256
|
|
1194
|
-
--compilation-config: 3
|
|
1195
1025
|
DeepSeek-R1-Distill-Qwen-1.5B:
|
|
1196
1026
|
model_family: DeepSeek-R1
|
|
1197
1027
|
model_variant: Distill-Qwen-1.5B
|
|
@@ -1199,13 +1029,11 @@ models:
|
|
|
1199
1029
|
gpus_per_node: 1
|
|
1200
1030
|
num_nodes: 1
|
|
1201
1031
|
vocab_size: 152064
|
|
1202
|
-
qos: m2
|
|
1203
1032
|
time: 08:00:00
|
|
1204
|
-
|
|
1033
|
+
resource_type: l40s
|
|
1205
1034
|
vllm_args:
|
|
1206
1035
|
--max-model-len: 131072
|
|
1207
1036
|
--max-num-seqs: 256
|
|
1208
|
-
--compilation-config: 3
|
|
1209
1037
|
Phi-3.5-vision-instruct:
|
|
1210
1038
|
model_family: Phi-3.5-vision
|
|
1211
1039
|
model_variant: instruct
|
|
@@ -1213,14 +1041,12 @@ models:
|
|
|
1213
1041
|
gpus_per_node: 2
|
|
1214
1042
|
num_nodes: 1
|
|
1215
1043
|
vocab_size: 32064
|
|
1216
|
-
qos: m2
|
|
1217
1044
|
time: 08:00:00
|
|
1218
|
-
|
|
1045
|
+
resource_type: l40s
|
|
1219
1046
|
vllm_args:
|
|
1220
1047
|
--tensor-parallel-size: 2
|
|
1221
1048
|
--max-model-len: 65536
|
|
1222
1049
|
--max-num-seqs: 256
|
|
1223
|
-
--compilation-config: 3
|
|
1224
1050
|
InternVL2_5-8B:
|
|
1225
1051
|
model_family: InternVL2_5
|
|
1226
1052
|
model_variant: 8B
|
|
@@ -1228,13 +1054,11 @@ models:
|
|
|
1228
1054
|
gpus_per_node: 1
|
|
1229
1055
|
num_nodes: 1
|
|
1230
1056
|
vocab_size: 92553
|
|
1231
|
-
qos: m2
|
|
1232
1057
|
time: 08:00:00
|
|
1233
|
-
|
|
1058
|
+
resource_type: l40s
|
|
1234
1059
|
vllm_args:
|
|
1235
1060
|
--max-model-len: 32768
|
|
1236
1061
|
--max-num-seqs: 256
|
|
1237
|
-
--compilation-config: 3
|
|
1238
1062
|
glm-4v-9b:
|
|
1239
1063
|
model_family: glm-4v
|
|
1240
1064
|
model_variant: 9b
|
|
@@ -1242,13 +1066,11 @@ models:
|
|
|
1242
1066
|
gpus_per_node: 1
|
|
1243
1067
|
num_nodes: 1
|
|
1244
1068
|
vocab_size: 151552
|
|
1245
|
-
qos: m2
|
|
1246
1069
|
time: 08:00:00
|
|
1247
|
-
|
|
1070
|
+
resource_type: l40s
|
|
1248
1071
|
vllm_args:
|
|
1249
1072
|
--max-model-len: 8192
|
|
1250
1073
|
--max-num-seqs: 256
|
|
1251
|
-
--compilation-config: 3
|
|
1252
1074
|
Molmo-7B-D-0924:
|
|
1253
1075
|
model_family: Molmo
|
|
1254
1076
|
model_variant: 7B-D-0924
|
|
@@ -1256,27 +1078,23 @@ models:
|
|
|
1256
1078
|
gpus_per_node: 1
|
|
1257
1079
|
num_nodes: 1
|
|
1258
1080
|
vocab_size: 152064
|
|
1259
|
-
qos: m2
|
|
1260
1081
|
time: 08:00:00
|
|
1261
|
-
|
|
1082
|
+
resource_type: l40s
|
|
1262
1083
|
vllm_args:
|
|
1263
1084
|
--max-model-len: 4096
|
|
1264
1085
|
--max-num-seqs: 256
|
|
1265
|
-
--compilation-config: 3
|
|
1266
1086
|
deepseek-vl2:
|
|
1267
1087
|
model_family: deepseek-vl2
|
|
1268
1088
|
model_type: VLM
|
|
1269
1089
|
gpus_per_node: 2
|
|
1270
1090
|
num_nodes: 1
|
|
1271
1091
|
vocab_size: 129280
|
|
1272
|
-
qos: m2
|
|
1273
1092
|
time: 08:00:00
|
|
1274
|
-
|
|
1093
|
+
resource_type: l40s
|
|
1275
1094
|
vllm_args:
|
|
1276
1095
|
--tensor-parallel-size: 2
|
|
1277
1096
|
--max-model-len: 4096
|
|
1278
1097
|
--max-num-seqs: 256
|
|
1279
|
-
--compilation-config: 3
|
|
1280
1098
|
deepseek-vl2-small:
|
|
1281
1099
|
model_family: deepseek-vl2
|
|
1282
1100
|
model_variant: small
|
|
@@ -1284,10 +1102,20 @@ models:
|
|
|
1284
1102
|
gpus_per_node: 1
|
|
1285
1103
|
num_nodes: 1
|
|
1286
1104
|
vocab_size: 129280
|
|
1287
|
-
qos: m2
|
|
1288
1105
|
time: 08:00:00
|
|
1289
|
-
|
|
1106
|
+
resource_type: l40s
|
|
1290
1107
|
vllm_args:
|
|
1291
1108
|
--max-model-len: 4096
|
|
1292
1109
|
--max-num-seqs: 256
|
|
1293
|
-
|
|
1110
|
+
Qwen3-14B:
|
|
1111
|
+
model_family: Qwen3
|
|
1112
|
+
model_variant: 14B
|
|
1113
|
+
model_type: LLM
|
|
1114
|
+
gpus_per_node: 1
|
|
1115
|
+
num_nodes: 1
|
|
1116
|
+
vocab_size: 151936
|
|
1117
|
+
time: 08:00:00
|
|
1118
|
+
resource_type: l40s
|
|
1119
|
+
vllm_args:
|
|
1120
|
+
--max-model-len: 40960
|
|
1121
|
+
--max-num-seqs: 256
|